Call generateWaitcnt unconditionally at the end of SIInsertWaitcnts::insertWaitcntInBlock. Even if we don't need to generate a new waitcnt instruction it has the effect of combining or removing redundant waitcnts that were already present. Tests show various small improvements in waitcnt placement.
4895 lines
203 KiB
LLVM
4895 lines
203 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
|
|
|
|
define void @void_func_i1(i1 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i1 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1_zeroext:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_signext(i1 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i1_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i1_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i1_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @i1_arg_i1_use(i1 %arg) #0 {
|
|
; CIGFX89-LABEL: i1_arg_i1_use:
|
|
; CIGFX89: ; %bb.0: ; %bb
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
|
; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1
|
|
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
|
|
; CIGFX89-NEXT: s_cbranch_execz .LBB3_2
|
|
; CIGFX89-NEXT: ; %bb.1: ; %bb1
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, 0
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: .LBB3_2: ; %bb2
|
|
; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: i1_arg_i1_use:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
|
; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1
|
|
; GFX11-NEXT: s_and_saveexec_b32 s0, s1
|
|
; GFX11-NEXT: s_cbranch_execz .LBB3_2
|
|
; GFX11-NEXT: ; %bb.1: ; %bb1
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB3_2: ; %bb2
|
|
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
bb:
|
|
br i1 %arg, label %bb2, label %bb1
|
|
|
|
bb1:
|
|
store volatile i32 0, ptr addrspace(1) undef
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8(i8 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i8 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_signext(i8 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16(i16 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i16:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i16 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_signext(i16 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i32(i32 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i64(i64 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i64 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f16(half %arg0) #0 {
|
|
; CI-LABEL: void_func_f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store half %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f32(float %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store float %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f64(double %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store double %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i32(<2 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32(<3 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i32(<4 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i32(<5 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i32(<8 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i32(<16 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32(<32 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; 1 over register limit
|
|
define void @void_func_v33i32(<33 x i32> %arg0) #0 {
|
|
; CI-LABEL: void_func_v33i32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(5)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(5)
|
|
; CI-NEXT: buffer_store_dword v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v33i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(5)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(5)
|
|
; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v33i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX9-NEXT: buffer_store_dword v16, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v33i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <33 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i64(<2 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i64(<3 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i64(<4 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i64(<5 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i64(<8 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i64(<16 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i8(<2 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i16(<2 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i8(<3 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 2
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 2
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i8(<4 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i8(<5 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 4
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 4
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 4
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i8(<8 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i8(<16 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v14, v15, v14
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v10, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v14
|
|
; CI-NEXT: v_or_b32_e32 v11, v8, v10
|
|
; CI-NEXT: v_or_b32_e32 v10, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v9, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v10, v10, v11
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v9, v12
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v8, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v4, v5
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i8(<32 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v32i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_or_b32_e32 v5, v5, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_and_b32_e32 v13, 0xff, v14
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_or_b32_e32 v7, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v29
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v28
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v26
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_or_b32_e32 v1, v4, v1
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v30
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v27
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v9, v9, v13
|
|
; CI-NEXT: v_or_b32_e32 v10, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v11, 0xffff, v12
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
|
; CI-NEXT: v_or_b32_e32 v5, v5, v6
|
|
; CI-NEXT: v_or_b32_e32 v6, v0, v2
|
|
; CI-NEXT: v_or_b32_e32 v9, v11, v9
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 8, v25
|
|
; CI-NEXT: v_and_b32_e32 v11, 0xff, v24
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v3
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; CI-NEXT: v_or_b32_e32 v3, v1, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xff, v22
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v23
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v21
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v20
|
|
; CI-NEXT: v_or_b32_e32 v1, v4, v1
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v18
|
|
; CI-NEXT: v_or_b32_e32 v1, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v19
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 8, v17
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xff, v16
|
|
; CI-NEXT: v_or_b32_e32 v4, v5, v4
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v0, v4, v0
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_dwordx4 v[6:9], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_load_ubyte v14, off, s[0:3], s32
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v29
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v25
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v10, v24, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v27
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v2, 8, v23
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v21
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v17
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v19
|
|
; GFX89-NEXT: v_or_b32_sdwa v19, v22, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v17, v20, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v15, v18, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 16
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v10, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v17, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v14
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v31, off, s32
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17
|
|
; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29
|
|
; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28
|
|
; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30
|
|
; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25
|
|
; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24
|
|
; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27
|
|
; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26
|
|
; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21
|
|
; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20
|
|
; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23
|
|
; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22
|
|
; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19
|
|
; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18
|
|
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v9, v10, v11
|
|
; GFX11-NEXT: v_or_b32_e32 v11, v16, v17
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v28, v29
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v24, v25
|
|
; GFX11-NEXT: v_or_b32_e32 v6, v26, v27
|
|
; GFX11-NEXT: v_or_b32_e32 v7, v20, v21
|
|
; GFX11-NEXT: v_or_b32_e32 v10, v22, v23
|
|
; GFX11-NEXT: v_or_b32_e32 v14, v18, v19
|
|
; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6
|
|
; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_or_b32_e32 v6, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v7, v10
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v11, v14
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v17
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 16
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v30, v1
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v7, v18, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v15, v16
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i16(<3 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i16(<4 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i16(<5 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i16(<8 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i16(<16 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14
|
|
; CI-NEXT: v_or_b32_e32 v14, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10
|
|
; CI-NEXT: v_or_b32_e32 v12, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i24:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v2i24:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
|
|
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v2i24:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i24:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%elt0 = extractelement <2 x i24> %arg0, i32 0
|
|
%elt1 = extractelement <2 x i24> %arg0, i32 1
|
|
%add = add i24 %elt0, %elt1
|
|
store i24 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f32(<2 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f32(<3 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f32(<4 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f32(<8 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f32(<16 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f64(<2 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f64(<3 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f64(<4 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f64(<8 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f64(<16 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f16(<2 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different abi if f16 legal
|
|
define void @void_func_v3f16(<3 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f16(<4 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v2, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f16(<8 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v8, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f16(<16 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v16, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v15
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v14
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v13
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v12
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v12, v7, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v11
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v10
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v9
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v10, v7, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Make sure there is no alignment requirement for passed vgprs.
|
|
define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_i32_i64_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32_i64_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile i32 %arg0, ptr addrspace(1) undef
|
|
store volatile i64 %arg1, ptr addrspace(1) undef
|
|
store volatile i32 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i32({ i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i32 } %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i8_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i8, i32 } %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_struct_i8_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_u8 v1, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
|
|
store { i8, i32 } %arg0.load, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
|
|
; CI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
|
|
%arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
|
|
store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef
|
|
store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef
|
|
store volatile i32 %arg2, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(2)
|
|
; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_i32_byval_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v2, off, s32
|
|
; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load i32, ptr addrspace(5) %arg0
|
|
%arg1.load = load i64, ptr addrspace(5) %arg1
|
|
store i32 %arg0.load, ptr addrspace(1) undef
|
|
store i64 %arg1.load, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_i32_i64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_i32_i64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_i32_i64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i32_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile i32 %arg1, ptr addrspace(1) undef
|
|
store volatile i64 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different ext load types on CI vs. VI
|
|
define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
|
|
; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v16, v16
|
|
; CI-NEXT: v_mul_f32_e32 v20, 1.0, v20
|
|
; CI-NEXT: v_and_b32_e32 v0, 1, v17
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v20
|
|
; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v19, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_and_b32_e32 v0, 1, v20
|
|
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_short v17, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_short v18, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_short v19, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_and_b32_e32 v0, 1, v20
|
|
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_short v17, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_short v18, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_short v19, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: v_and_b32_e32 v16, 1, v32
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
store volatile i16 %arg3, ptr addrspace(1) undef
|
|
store volatile half %arg4, ptr addrspace(1) undef
|
|
store volatile bfloat %arg5, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v15, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v8, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v13, v13
|
|
; CI-NEXT: v_mul_f32_e32 v9, 1.0, v20
|
|
; CI-NEXT: v_mul_f32_e32 v10, 1.0, v16
|
|
; CI-NEXT: v_mul_f32_e32 v11, 1.0, v17
|
|
; CI-NEXT: v_mul_f32_e32 v16, 1.0, v18
|
|
; CI-NEXT: v_mul_f32_e32 v17, 1.0, v19
|
|
; CI-NEXT: v_mul_f32_e32 v12, 1.0, v12
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v9
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v10
|
|
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v11
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v16
|
|
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v17
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
|
|
; CI-NEXT: buffer_store_short v14, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v5, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v19, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v18, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v19, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i16> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x half> %arg2, ptr addrspace(1) undef
|
|
store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef
|
|
store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i64> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x double> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <4 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <4 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x10
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <8 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <8 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
|
|
; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
|
|
; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
|
|
; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
|
|
; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
|
|
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
|
|
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
|
|
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
|
|
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
|
|
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
|
|
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
|
|
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
|
|
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120
|
|
; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <16 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <16 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
|
|
define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3f32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3f32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x float> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x float> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x float> %arg0, i32 2
|
|
store volatile float %arg0.0, ptr addrspace(3) undef
|
|
store volatile float %arg0.1, ptr addrspace(3) undef
|
|
store volatile float %arg0.2, ptr addrspace(3) undef
|
|
store volatile i32 %arg1, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3i32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3i32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x i32> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x i32> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x i32> %arg0, i32 2
|
|
store volatile i32 %arg0.0, ptr addrspace(3) undef
|
|
store volatile i32 %arg0.1, ptr addrspace(3) undef
|
|
store volatile i32 %arg0.2, ptr addrspace(3) undef
|
|
store volatile i32 %arg1, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_volatile_v16i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_volatile_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <16 x i8> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i8:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x10
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <16 x i8> %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @void_func_bf16(bfloat %arg0) #0 {
|
|
; CI-LABEL: void_func_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store bfloat %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14
|
|
; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12
|
|
; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10
|
|
; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|