This removes the need to explicitly set isTruncStore on truncstorei8 and
other similar PatFrags that include truncstore in their frags DAG.
This allows some new patterns to be imported for AMDGPU as you can see
in the changed test.
The extra isTruncStore were added in ae2b36e8bd, along with some
other tablegen changes to look for MemoryVT along with isTruncStore. I
did not remove the code, because I'm not sure if any out of tree users
have become dependent on it. It's no longer exercised in tree.
275 lines
14 KiB
LLVM
275 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10-PACKED %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG,GFX12-PACKED-SDAG-TRUE16 %s
|
|
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG,GFX12-PACKED-SDAG-FAKE16 %s
|
|
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL,GFX12-PACKED-GISEL-TRUE16 %s
|
|
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL,GFX12-PACKED-GISEL-FAKE16 %s
|
|
|
|
define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) {
|
|
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
|
|
; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-UNPACKED-NEXT: tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-UNPACKED-NEXT: s_endpgm
|
|
;
|
|
; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
|
|
; PREGFX10-PACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-PACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-PACKED-NEXT: tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
|
|
; GFX10-PACKED: ; %bb.0: ; %main_body
|
|
; GFX10-PACKED-NEXT: s_clause 0x1
|
|
; GFX10-PACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; GFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
|
|
; GFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
|
|
; GFX11-PACKED: ; %bb.0: ; %main_body
|
|
; GFX11-PACKED-NEXT: s_clause 0x1
|
|
; GFX11-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
|
|
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX11-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
|
|
; GFX12-PACKED: ; %bb.0: ; %main_body
|
|
; GFX12-PACKED-NEXT: s_clause 0x1
|
|
; GFX12-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
|
|
; GFX12-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-PACKED-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX12-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX12-PACKED-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data) {
|
|
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
|
|
; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-UNPACKED-NEXT: s_lshr_b32 s4, s6, 16
|
|
; PREGFX10-UNPACKED-NEXT: s_and_b32 s5, s6, 0xffff
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, s5
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v1, s4
|
|
; PREGFX10-UNPACKED-NEXT: tbuffer_store_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-UNPACKED-NEXT: s_endpgm
|
|
;
|
|
; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
|
|
; PREGFX10-PACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-PACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-PACKED-NEXT: tbuffer_store_format_d16_xy v0, off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
|
|
; GFX10-PACKED: ; %bb.0: ; %main_body
|
|
; GFX10-PACKED-NEXT: s_clause 0x1
|
|
; GFX10-PACKED-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; GFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
|
|
; GFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
|
|
; GFX11-PACKED: ; %bb.0: ; %main_body
|
|
; GFX11-PACKED-NEXT: s_clause 0x1
|
|
; GFX11-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
|
|
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX11-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX12-PACKED-LABEL: tbuffer_store_d16_xy:
|
|
; GFX12-PACKED: ; %bb.0: ; %main_body
|
|
; GFX12-PACKED-NEXT: s_clause 0x1
|
|
; GFX12-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
|
|
; GFX12-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-PACKED-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX12-PACKED-NEXT: tbuffer_store_d16_format_xy v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX12-PACKED-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data) {
|
|
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
|
|
; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-UNPACKED-NEXT: s_and_b32 s4, s7, 0xffff
|
|
; PREGFX10-UNPACKED-NEXT: s_lshr_b32 s5, s6, 16
|
|
; PREGFX10-UNPACKED-NEXT: s_and_b32 s6, s6, 0xffff
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v1, s5
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v2, s4
|
|
; PREGFX10-UNPACKED-NEXT: tbuffer_store_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-UNPACKED-NEXT: s_endpgm
|
|
;
|
|
; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
|
|
; PREGFX10-PACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-PACKED-NEXT: s_and_b32 s4, s7, 0xffff
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v1, s4
|
|
; PREGFX10-PACKED-NEXT: tbuffer_store_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
|
|
; GFX10-PACKED: ; %bb.0: ; %main_body
|
|
; GFX10-PACKED-NEXT: s_clause 0x1
|
|
; GFX10-PACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; GFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-PACKED-NEXT: s_and_b32 s4, s7, 0xffff
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
|
|
; GFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
|
|
; GFX11-PACKED: ; %bb.0: ; %main_body
|
|
; GFX11-PACKED-NEXT: s_clause 0x1
|
|
; GFX11-PACKED-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
|
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-PACKED-NEXT: s_and_b32 s4, s7, 0xffff
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX11-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX12-PACKED-SDAG-LABEL: tbuffer_store_d16_xyz:
|
|
; GFX12-PACKED-SDAG: ; %bb.0: ; %main_body
|
|
; GFX12-PACKED-SDAG-NEXT: s_clause 0x1
|
|
; GFX12-PACKED-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
|
; GFX12-PACKED-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-PACKED-SDAG-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-PACKED-SDAG-NEXT: s_and_b32 s4, s7, 0xffff
|
|
; GFX12-PACKED-SDAG-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX12-PACKED-SDAG-NEXT: v_mov_b32_e32 v1, s4
|
|
; GFX12-PACKED-SDAG-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX12-PACKED-SDAG-NEXT: s_endpgm
|
|
;
|
|
; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_xyz:
|
|
; GFX12-PACKED-GISEL: ; %bb.0: ; %main_body
|
|
; GFX12-PACKED-GISEL-NEXT: s_clause 0x1
|
|
; GFX12-PACKED-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
|
; GFX12-PACKED-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-PACKED-GISEL-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-PACKED-GISEL-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX12-PACKED-GISEL-NEXT: v_mov_b32_e32 v1, s7
|
|
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX12-PACKED-GISEL-NEXT: s_endpgm
|
|
main_body:
|
|
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
|
call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data) {
|
|
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
|
|
; PREGFX10-UNPACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; PREGFX10-UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-UNPACKED-NEXT: s_lshr_b32 s4, s7, 16
|
|
; PREGFX10-UNPACKED-NEXT: s_and_b32 s5, s7, 0xffff
|
|
; PREGFX10-UNPACKED-NEXT: s_lshr_b32 s7, s6, 16
|
|
; PREGFX10-UNPACKED-NEXT: s_and_b32 s6, s6, 0xffff
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v1, s7
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v2, s5
|
|
; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v3, s4
|
|
; PREGFX10-UNPACKED-NEXT: tbuffer_store_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-UNPACKED-NEXT: s_endpgm
|
|
;
|
|
; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
|
|
; PREGFX10-PACKED: ; %bb.0: ; %main_body
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; PREGFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; PREGFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v1, s7
|
|
; PREGFX10-PACKED-NEXT: tbuffer_store_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED]
|
|
; PREGFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
|
|
; GFX10-PACKED: ; %bb.0: ; %main_body
|
|
; GFX10-PACKED-NEXT: s_clause 0x1
|
|
; GFX10-PACKED-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
|
|
; GFX10-PACKED-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX10-PACKED-NEXT: v_mov_b32_e32 v1, s7
|
|
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
|
|
; GFX10-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
|
|
; GFX11-PACKED: ; %bb.0: ; %main_body
|
|
; GFX11-PACKED-NEXT: s_clause 0x1
|
|
; GFX11-PACKED-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
|
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s7
|
|
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX11-PACKED-NEXT: s_endpgm
|
|
;
|
|
; GFX12-PACKED-LABEL: tbuffer_store_d16_xyzw:
|
|
; GFX12-PACKED: ; %bb.0: ; %main_body
|
|
; GFX12-PACKED-NEXT: s_clause 0x1
|
|
; GFX12-PACKED-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
|
|
; GFX12-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-PACKED-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, s6
|
|
; GFX12-PACKED-NEXT: v_mov_b32_e32 v1, s7
|
|
; GFX12-PACKED-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
|
; GFX12-PACKED-NEXT: s_endpgm
|
|
main_body:
|
|
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32)
|
|
declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32)
|
|
declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32)
|
|
declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32)
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX11-PACKED-FAKE16: {{.*}}
|
|
; GFX11-PACKED-TRUE16: {{.*}}
|
|
; GFX12-PACKED-GISEL-FAKE16: {{.*}}
|
|
; GFX12-PACKED-GISEL-TRUE16: {{.*}}
|
|
; GFX12-PACKED-SDAG-FAKE16: {{.*}}
|
|
; GFX12-PACKED-SDAG-TRUE16: {{.*}}
|