Occupancy (i.e., the number of waves per EU) depends, in addition to register usage, on per-workgroup LDS usage as well as on the range of possible workgroup sizes. Mirroring the latter, occupancy should therefore be expressed as a range since different group sizes generally yield different achievable occupancies. `getOccupancyWithLocalMemSize` currently returns a scalar occupancy based on the maximum workgroup size and LDS usage. With respect to the workgroup size range, this scalar can be the minimum, the maximum, or neither of the two of the range of achievable occupancies. This commit fixes the function by making it compute and return the range of achievable occupancies w.r.t. workgroup size and LDS usage; it also renames it to `getOccupancyWithWorkGroupSizes` since it is the range of workgroup sizes that produces the range of achievable occupancies. Computing the achievable occupancy range is surprisingly involved. Minimum/maximum workgroup sizes do not necessarily yield maximum/minimum occupancies i.e., sometimes workgroup sizes inside the range yield the occupancy bounds. The implementation finds these sizes in constant time; heavy documentation explains the rationale behind the sometimes relatively obscure calculations. As a justifying example, consider a target with 10 waves / EU, 4 EUs/CU, 64-wide waves. Also consider a function with no LDS usage and a flat workgroup size range of [513,1024]. - A group of 513 items requires 9 waves per group. Only 4 groups made up of 9 waves each can fit fully on a CU at any given time, for a total of 36 waves on the CU, or 9 per EU. However, filling as much as possible the remaining 40-36=4 wave slots without decreasing the number of groups reveals that a larger group of 640 items yields 40 waves on the CU, or 10 per EU. - Similarly, a group of 1024 items requires 16 waves per group. Only 2 groups made up of 16 waves each can fit fully on a CU ay any given time, for a total of 32 waves on the CU, or 8 per EU. However, removing as many waves as possible from the groups without being able to fit another equal-sized group on the CU reveals that a smaller group of 896 items yields 28 waves on the CU, or 7 per EU. Therefore the achievable occupancy range for this function is not [8,9] as the group size bounds directly yield, but [7,10]. Naturally this change causes a lot of test churn as instruction scheduling is driven by achievable occupancy estimates. In most unit tests the flat workgroup size range is the default [1,1024] which, ignoring potential LDS limitations, would previously produce a scalar occupancy of 8 (derived from 1024) on a lot of targets, whereas we now consider the maximum occupancy to be 10 in such cases. Most tests are updated automatically and checked manually for sanity. I also manually changed some non-automatically generated assertions when necessary. Fixes #118220.
4519 lines
187 KiB
LLVM
4519 lines
187 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
|
|
|
|
define void @void_func_i1(i1 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i1 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i1_zeroext:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i1_signext(i1 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i1_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i1_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i1_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i1_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i1 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @i1_arg_i1_use(i1 %arg) #0 {
|
|
; CIGFX89-LABEL: i1_arg_i1_use:
|
|
; CIGFX89: ; %bb.0: ; %bb
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
|
; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1
|
|
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
|
|
; CIGFX89-NEXT: s_cbranch_execz .LBB3_2
|
|
; CIGFX89-NEXT: ; %bb.1: ; %bb1
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: v_mov_b32_e32 v0, 0
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: .LBB3_2: ; %bb2
|
|
; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: i1_arg_i1_use:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
|
; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1
|
|
; GFX11-NEXT: s_and_saveexec_b32 s0, s1
|
|
; GFX11-NEXT: s_cbranch_execz .LBB3_2
|
|
; GFX11-NEXT: ; %bb.1: ; %bb1
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB3_2: ; %bb2
|
|
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
bb:
|
|
br i1 %arg, label %bb2, label %bb1
|
|
|
|
bb1:
|
|
store volatile i32 0, ptr addrspace(1) undef
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8(i8 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i8 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i8_signext(i8 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i8_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i8_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i8_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i8_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i8 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16(i16 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i16:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i16 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_zeroext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_zeroext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_zeroext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_zeroext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = zext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i16_signext(i16 signext %arg0) #0 {
|
|
; CI-LABEL: void_func_i16_signext:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_i16_signext:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_i16_signext:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i16_signext:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%ext = sext i16 %arg0 to i32
|
|
%add = add i32 %ext, 12
|
|
store i32 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i32(i32 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i32 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_i64(i64 %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store i64 %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f16(half %arg0) #0 {
|
|
; CI-LABEL: void_func_f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store half %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f32(float %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store float %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_f64(double %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store double %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i32(<2 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32(<3 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i32(<4 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i32(<5 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i32(<8 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i32(<16 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32(<32 x i32> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; 1 over register limit
|
|
define void @void_func_v33i32(<33 x i32> %arg0) #0 {
|
|
; CI-LABEL: void_func_v33i32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(6)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(6)
|
|
; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v33i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(6)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(6)
|
|
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v33i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v33i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <33 x i32> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i64(<2 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i64(<3 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i64(<4 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i64(<5 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v5i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x2
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i64(<8 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i64(<16 x i64> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i64> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i8(<2 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i16(<2 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i8(<3 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 2
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 2
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i8(<4 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i8(<5 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 4
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 4
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 4
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i8(<8 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i8(<16 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_or_b32_e32 v14, v15, v14
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v10, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_or_b32_e32 v2, v3, v2
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s4, 0
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v14
|
|
; CI-NEXT: v_or_b32_e32 v11, v8, v10
|
|
; CI-NEXT: v_or_b32_e32 v10, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v9, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 s5, s4
|
|
; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s4, 0
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_mov_b32 s5, s4
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v10, v10, v11
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v9, v12
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v8, v2
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v4, v5
|
|
; GFX11-NEXT: s_mov_b32 s0, 0
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v6
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_mov_b32 s1, s0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i8(<32 x i8> %arg0) #0 {
|
|
; CI-LABEL: void_func_v32i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; CI-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; CI-NEXT: v_and_b32_e32 v9, 0xff, v14
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; CI-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 24, v15
|
|
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v29
|
|
; CI-NEXT: v_and_b32_e32 v14, 0xff, v28
|
|
; CI-NEXT: v_and_b32_e32 v26, 0xff, v26
|
|
; CI-NEXT: v_lshlrev_b32_e32 v25, 8, v25
|
|
; CI-NEXT: v_and_b32_e32 v24, 0xff, v24
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
|
; CI-NEXT: v_or_b32_e32 v1, v1, v9
|
|
; CI-NEXT: v_or_b32_e32 v9, v11, v10
|
|
; CI-NEXT: v_and_b32_e32 v10, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v6, v7, v6
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v27
|
|
; CI-NEXT: v_and_b32_e32 v27, 0xff, v30
|
|
; CI-NEXT: v_or_b32_e32 v13, v14, v13
|
|
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v26
|
|
; CI-NEXT: v_or_b32_e32 v7, v3, v2
|
|
; CI-NEXT: v_or_b32_e32 v3, v10, v1
|
|
; CI-NEXT: v_or_b32_e32 v1, v4, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v27
|
|
; CI-NEXT: v_or_b32_e32 v11, v15, v14
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v13
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v7
|
|
; CI-NEXT: v_or_b32_e32 v2, v8, v9
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v20
|
|
; CI-NEXT: v_and_b32_e32 v9, 0xff, v16
|
|
; CI-NEXT: s_mov_b32 s5, 0
|
|
; CI-NEXT: s_mov_b32 s4, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v5
|
|
; CI-NEXT: v_or_b32_e32 v5, v24, v25
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v26
|
|
; CI-NEXT: v_or_b32_e32 v6, v5, v11
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xff, v22
|
|
; CI-NEXT: v_or_b32_e32 v7, v12, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v23
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v21
|
|
; CI-NEXT: v_or_b32_e32 v5, v8, v5
|
|
; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xff, v18
|
|
; CI-NEXT: v_or_b32_e32 v5, v5, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v19
|
|
; CI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v8, 8, v17
|
|
; CI-NEXT: v_or_b32_e32 v8, v9, v8
|
|
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; CI-NEXT: v_or_b32_e32 v4, v8, v4
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_mov_b32 s4, s5
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i8:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
|
|
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_load_ubyte v10, off, s[0:3], s32
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
|
|
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v7, 8, v7
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v3
|
|
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v11, 8, v29
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v14, 8, v25
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v27
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v21, 8, v21
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v23, 8, v23
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v17, 8, v17
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v19, 8, v19
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_mov_b32 s5, 0
|
|
; GFX89-NEXT: s_mov_b32 s4, 16
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v6, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v5, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v4, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v10
|
|
; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
|
; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_mov_b32 s4, s5
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v31, off, s32
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
|
|
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
|
|
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
|
|
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
|
|
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
|
|
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
|
|
; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17
|
|
; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
|
|
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
|
|
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
|
|
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
|
|
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
|
|
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
|
|
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
|
|
; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29
|
|
; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28
|
|
; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30
|
|
; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25
|
|
; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24
|
|
; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27
|
|
; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26
|
|
; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21
|
|
; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20
|
|
; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23
|
|
; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22
|
|
; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19
|
|
; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18
|
|
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v9, v10, v11
|
|
; GFX11-NEXT: v_or_b32_e32 v11, v16, v17
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v28, v29
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v24, v25
|
|
; GFX11-NEXT: v_or_b32_e32 v6, v26, v27
|
|
; GFX11-NEXT: v_or_b32_e32 v7, v20, v21
|
|
; GFX11-NEXT: v_or_b32_e32 v10, v22, v23
|
|
; GFX11-NEXT: v_or_b32_e32 v14, v18, v19
|
|
; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5
|
|
; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2
|
|
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6
|
|
; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10
|
|
; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14
|
|
; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13
|
|
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9
|
|
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; GFX11-NEXT: v_or_b32_e32 v6, v4, v5
|
|
; GFX11-NEXT: v_or_b32_e32 v5, v7, v10
|
|
; GFX11-NEXT: v_or_b32_e32 v4, v11, v14
|
|
; GFX11-NEXT: v_or_b32_e32 v3, v12, v13
|
|
; GFX11-NEXT: v_or_b32_e32 v2, v8, v9
|
|
; GFX11-NEXT: v_or_b32_e32 v0, v0, v17
|
|
; GFX11-NEXT: s_mov_b32 s1, 0
|
|
; GFX11-NEXT: s_mov_b32 s0, 16
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v30, v1
|
|
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_or_b32_e32 v7, v18, v1
|
|
; GFX11-NEXT: v_or_b32_e32 v1, v15, v16
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: s_mov_b32 s0, s1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <32 x i8> %arg0, ptr addrspace(1) null
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i16(<3 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4i16(<4 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v5i16(<5 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v5i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v2, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v0, v1
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v5i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v5i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <5 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8i16(<8 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16i16(<16 x i16> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16i16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
|
|
; CI-NEXT: v_or_b32_e32 v5, v4, v5
|
|
; CI-NEXT: v_or_b32_e32 v4, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v3, v0, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14
|
|
; CI-NEXT: v_or_b32_e32 v14, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10
|
|
; CI-NEXT: v_or_b32_e32 v12, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9
|
|
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v6, v6, v7
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16i16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16i16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x i16> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2i24:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v2i24:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
|
|
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v2i24:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
|
|
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2i24:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%elt0 = extractelement <2 x i24> %arg0, i32 0
|
|
%elt1 = extractelement <2 x i24> %arg0, i32 1
|
|
%add = add i24 %elt0, %elt1
|
|
store i24 %add, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f32(<2 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f32(<3 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f32(<4 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f32(<8 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f32(<16 x float> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x float> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f64(<2 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v2f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3f64(<3 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v3f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f64(<4 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v4f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f64(<8 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v8f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f64(<16 x double> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_v16f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x double> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2f16(<2 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different abi if f16 legal
|
|
define void @void_func_v3f16(<3 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4f16(<4 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
|
|
; CI-NEXT: v_or_b32_e32 v1, v2, v1
|
|
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
|
; CI-NEXT: v_or_b32_e32 v0, v0, v2
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8f16(<8 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v8, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16f16(<16 x half> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16f16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v16, v5
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
|
|
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
|
|
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_or_b32_e32 v5, v6, v5
|
|
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16
|
|
; CI-NEXT: v_or_b32_e32 v3, v2, v3
|
|
; CI-NEXT: v_or_b32_e32 v2, v0, v1
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v15
|
|
; CI-NEXT: v_or_b32_e32 v4, v4, v6
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v14
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v13
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v12
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v13, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v12, v7, v0
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v0, v11
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v1, v10
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v6, v9
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v7, v8
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_or_b32_e32 v11, v1, v0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
|
|
; CI-NEXT: v_or_b32_e32 v10, v7, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16f16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16f16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x half> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Make sure there is no alignment requirement for passed vgprs.
|
|
define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_i32_i64_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_i32_i64_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile i32 %arg0, ptr addrspace(1) undef
|
|
store volatile i64 %arg1, ptr addrspace(1) undef
|
|
store volatile i32 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i32({ i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i32 } %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_struct_i8_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store { i8, i32 } %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_struct_i8_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_u8 v1, off, s32
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
|
|
store { i8, i32 } %arg0.load, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
|
|
; CI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_struct_i8_i32_x2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
|
|
%arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
|
|
store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef
|
|
store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef
|
|
store volatile i32 %arg2, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
|
|
; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(2)
|
|
; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_byval_i32_byval_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: scratch_load_b32 v2, off, s32
|
|
; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.load = load i32, ptr addrspace(5) %arg0
|
|
%arg1.load = load i64, ptr addrspace(5) %arg1
|
|
store i32 %arg0.load, ptr addrspace(1) undef
|
|
store i64 %arg1.load, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_i32_i64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(3)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i32_i64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x3
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile i32 %arg1, ptr addrspace(1) undef
|
|
store volatile i64 %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Different ext load types on CI vs. VI
|
|
define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
|
|
; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: s_waitcnt vmcnt(5)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v12, 1.0, v32
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v13, v33
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_and_b32_e32 v0, 1, v34
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v12
|
|
; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v36, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX89-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:4
|
|
; GFX89-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:8
|
|
; GFX89-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:12
|
|
; GFX89-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:16
|
|
; GFX89-NEXT: buffer_load_ushort v36, off, s[0:3], s32 offset:20
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: v_and_b32_e32 v0, 1, v32
|
|
; GFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v34, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v35, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_short v36, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: v_and_b32_e32 v16, 1, v32
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile i1 %arg1, ptr addrspace(1) undef
|
|
store volatile i8 %arg2, ptr addrspace(1) undef
|
|
store volatile i16 %arg3, ptr addrspace(1) undef
|
|
store volatile half %arg4, ptr addrspace(1) undef
|
|
store volatile bfloat %arg5, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(4)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx2 v[34:35], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x4
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
|
|
; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v10, v38
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v32
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v33
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v34
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v35
|
|
; CI-NEXT: v_mul_f32_e32 v8, 1.0, v36
|
|
; CI-NEXT: v_mul_f32_e32 v9, 1.0, v37
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v17, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f16_f32_e32 v11, v20
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
|
|
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v6
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v7
|
|
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v8
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v9
|
|
; CI-NEXT: buffer_store_short v11, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v10, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v5, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v3, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
|
|
; GFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
|
|
; GFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4
|
|
; GFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8
|
|
; GFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v35, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dword v36, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x5
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i16> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x half> %arg2, ptr addrspace(1) undef
|
|
store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef
|
|
store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(8)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <2 x i64> %arg1, ptr addrspace(1) undef
|
|
store volatile <2 x double> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
|
|
; CIGFX89-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20
|
|
; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16
|
|
; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12
|
|
; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
|
; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
|
|
; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32
|
|
; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28
|
|
; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(8)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x8
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <4 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <4 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x10
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <8 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <8 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
|
|
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
|
|
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
|
|
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
|
|
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
|
|
; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
|
|
; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
|
|
; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
|
|
; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
|
|
; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
|
|
; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
|
|
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
|
|
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
|
|
; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
|
|
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
|
|
; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
|
|
; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
|
|
; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
|
|
; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
|
|
; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
|
|
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
|
|
; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
|
|
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
|
|
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
|
|
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
|
|
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112
|
|
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108
|
|
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100
|
|
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128
|
|
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124
|
|
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120
|
|
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116
|
|
; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80
|
|
; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76
|
|
; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72
|
|
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x1f
|
|
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80
|
|
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76
|
|
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72
|
|
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96
|
|
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92
|
|
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88
|
|
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112
|
|
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108
|
|
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104
|
|
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128
|
|
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124
|
|
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120
|
|
; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4
|
|
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116
|
|
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100
|
|
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84
|
|
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <16 x i32> %arg1, ptr addrspace(1) undef
|
|
store volatile <16 x float> %arg2, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
|
|
define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3f32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3f32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3f32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x float> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x float> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x float> %arg0, i32 2
|
|
store volatile float %arg0.0, ptr addrspace(3) undef
|
|
store volatile float %arg0.1, ptr addrspace(3) undef
|
|
store volatile float %arg0.2, ptr addrspace(3) undef
|
|
store volatile i32 %arg1, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
|
|
; CI-LABEL: void_func_v3i32_wasted_reg:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: s_mov_b32 m0, -1
|
|
; CI-NEXT: ds_write_b32 v0, v0
|
|
; CI-NEXT: ds_write_b32 v0, v1
|
|
; CI-NEXT: ds_write_b32 v0, v2
|
|
; CI-NEXT: ds_write_b32 v0, v3
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v3i32_wasted_reg:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_write_b32 v0, v0
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: ds_write_b32 v0, v2
|
|
; VI-NEXT: ds_write_b32 v0, v3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: ds_write_b32 v0, v0
|
|
; GFX9-NEXT: ds_write_b32 v0, v1
|
|
; GFX9-NEXT: ds_write_b32 v0, v2
|
|
; GFX9-NEXT: ds_write_b32 v0, v3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3i32_wasted_reg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: ds_store_b32 v0, v0
|
|
; GFX11-NEXT: ds_store_b32 v0, v1
|
|
; GFX11-NEXT: ds_store_b32 v0, v2
|
|
; GFX11-NEXT: ds_store_b32 v0, v3
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
%arg0.0 = extractelement <3 x i32> %arg0, i32 0
|
|
%arg0.1 = extractelement <3 x i32> %arg0, i32 1
|
|
%arg0.2 = extractelement <3 x i32> %arg0, i32 2
|
|
store volatile i32 %arg0.0, ptr addrspace(3) undef
|
|
store volatile i32 %arg0.1, ptr addrspace(3) undef
|
|
store volatile i32 %arg0.2, ptr addrspace(3) undef
|
|
store volatile i32 %arg1, ptr addrspace(3) undef
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
|
|
; CIGFX89-LABEL: void_func_volatile_v16i8:
|
|
; CIGFX89: ; %bb.0:
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; CIGFX89-NEXT: s_mov_b32 s6, -1
|
|
; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
|
|
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_volatile_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <16 x i8> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; Check there is no crash.
|
|
define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
|
|
; CI-LABEL: void_func_v32i32_v16i8:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
|
|
; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64
|
|
; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:48
|
|
; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52
|
|
; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56
|
|
; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36
|
|
; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40
|
|
; CI-NEXT: s_waitcnt vmcnt(7)
|
|
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:28
|
|
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:32
|
|
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:20
|
|
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:24
|
|
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:16
|
|
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12
|
|
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:8
|
|
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:4
|
|
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44
|
|
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: void_func_v32i32_v16i8:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60
|
|
; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64
|
|
; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48
|
|
; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
|
|
; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
|
|
; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
|
|
; VI-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40
|
|
; VI-NEXT: s_waitcnt vmcnt(7)
|
|
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28
|
|
; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32
|
|
; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20
|
|
; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24
|
|
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16
|
|
; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12
|
|
; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8
|
|
; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4
|
|
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
|
|
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: void_func_v32i32_v16i8:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
|
|
; GFX9-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s6, -1
|
|
; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60
|
|
; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64
|
|
; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48
|
|
; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52
|
|
; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56
|
|
; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36
|
|
; GFX9-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40
|
|
; GFX9-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28
|
|
; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32
|
|
; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20
|
|
; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24
|
|
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16
|
|
; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12
|
|
; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8
|
|
; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4
|
|
; GFX9-NEXT: s_nop 0
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v38, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v32i32_v16i8:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_clause 0x10
|
|
; GFX11-NEXT: scratch_load_b32 v31, off, s32
|
|
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64
|
|
; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60
|
|
; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56
|
|
; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52
|
|
; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48
|
|
; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44
|
|
; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40
|
|
; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36
|
|
; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32
|
|
; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28
|
|
; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24
|
|
; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20
|
|
; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16
|
|
; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12
|
|
; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8
|
|
; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_waitcnt vmcnt(16)
|
|
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(15)
|
|
; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(14)
|
|
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(13)
|
|
; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(12)
|
|
; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(11)
|
|
; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(10)
|
|
; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(9)
|
|
; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(8)
|
|
; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(7)
|
|
; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(6)
|
|
; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(5)
|
|
; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(4)
|
|
; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(3)
|
|
; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(2)
|
|
; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(1)
|
|
; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
|
|
store volatile <16 x i8> %arg1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @void_func_bf16(bfloat %arg0) #0 {
|
|
; CI-LABEL: void_func_bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store bfloat %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v2bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v2bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v2bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <2 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v3bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v3bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v3bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <3 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v4bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v4bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v4bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <4 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v8bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v8bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v8bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <8 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
|
|
; CI-LABEL: void_func_v16bf16:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
|
|
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
|
|
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
|
|
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
|
|
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
|
|
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
|
|
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
|
|
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
|
|
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14
|
|
; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12
|
|
; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10
|
|
; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16
|
|
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9
|
|
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
|
|
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
|
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8
|
|
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
|
|
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
|
|
; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16
|
|
; CI-NEXT: s_mov_b32 s7, 0xf000
|
|
; CI-NEXT: s_mov_b32 s6, -1
|
|
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
|
|
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
|
|
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX89-LABEL: void_func_v16bf16:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: s_mov_b32 s7, 0xf000
|
|
; GFX89-NEXT: s_mov_b32 s6, -1
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
|
|
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: void_func_v16bf16:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s2, -1
|
|
; GFX11-NEXT: s_clause 0x1
|
|
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
|
|
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
store <16 x bfloat> %arg0, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|