In graphics shaders it is better overall to use NSA encoding for IMAGE instructions, because the benefit of less constrained register allocation outweighs the cost of larger encoding. In particular NSA form often avoids the need for extra V_MOV_B32 instructions between IMAGE instructions, which can allow the IMAGE instructions to be claused. Note that in GFX12 there is no longer a bit in the encoding to choose between NSA and non-NSA forms, so this only affects GFX10 and GFX11.
1672 lines
78 KiB
LLVM
1672 lines
78 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
|
|
; GFX9-LABEL: sample_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
|
; GFX9-LABEL: sample_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
|
|
; GFX9-LABEL: sample_3d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_3d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_3d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_3d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
|
|
; GFX9-LABEL: sample_cube:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cube:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_cube:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cube:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
|
|
; GFX9-LABEL: sample_1darray:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1darray:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1darray:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1darray:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
|
|
; GFX9-LABEL: sample_2darray:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_2darray:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_2darray:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_2darray:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
|
|
; GFX9-LABEL: sample_c_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
|
; GFX9-LABEL: sample_c_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_c_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_c_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, v3
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
|
|
; GFX9-LABEL: sample_b_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_b_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
|
|
; GFX9-LABEL: sample_b_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_b_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
|
|
; GFX9-LABEL: sample_c_b_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_b_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
|
|
; GFX9-LABEL: sample_c_b_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_b_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_b_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_b_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_b_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, v3
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_b_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_b_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_c_b_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_b_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_c_b_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX9-NEXT: s_wqm_b64 exec, exec
|
|
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v4
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX9-NEXT: v_perm_b32 v6, v3, v2, s14
|
|
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_b_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_b_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
|
|
; GFX9-LABEL: sample_d_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_d_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
|
; GFX9-LABEL: sample_d_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v4, v5, v4, s12
|
|
; GFX9-NEXT: v_perm_b32 v3, v3, v2, s12
|
|
; GFX9-NEXT: v_perm_b32 v2, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_d_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
|
|
; GFX11-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
|
|
; GFX9-LABEL: sample_d_3d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v12, v8
|
|
; GFX9-NEXT: v_mov_b32_e32 v10, v5
|
|
; GFX9-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX9-NEXT: v_perm_b32 v11, v7, v6, s12
|
|
; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12
|
|
; GFX9-NEXT: v_perm_b32 v7, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_3d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v5
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_d_3d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
|
|
; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[7:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_3d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_mov_b32_e32 v10, v8
|
|
; GFX12-NEXT: v_mov_b32_e32 v8, v5
|
|
; GFX12-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v9, v7, v6, 0x5040100
|
|
; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v[8:10]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
|
|
; GFX9-LABEL: sample_c_d_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
|
|
; GFX9-LABEL: sample_c_d_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v3
|
|
; GFX9-NEXT: v_mov_b32_e32 v8, v2
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v3, v6, v5, s12
|
|
; GFX9-NEXT: v_perm_b32 v2, v4, v7, s12
|
|
; GFX9-NEXT: v_perm_b32 v1, v8, v1, s12
|
|
; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_d_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s12
|
|
; GFX9-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_d_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_d_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v5, v5, v4, s12
|
|
; GFX9-NEXT: v_perm_b32 v4, v3, v2, s12
|
|
; GFX9-NEXT: v_perm_b32 v3, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_d_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
|
|
; GFX9-LABEL: sample_c_d_cl_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v3, v4, v3, s12
|
|
; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_cl_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_cl_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
|
|
; GFX9-LABEL: sample_c_d_cl_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v11, v7
|
|
; GFX9-NEXT: v_mov_b32_e32 v7, v0
|
|
; GFX9-NEXT: v_perm_b32 v10, v6, v5, s12
|
|
; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12
|
|
; GFX9-NEXT: v_perm_b32 v8, v2, v1, s12
|
|
; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_cl_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_cl_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v6, v6, v5, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
|
|
; GFX9-LABEL: sample_l_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_l_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_l_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_l_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
|
|
; GFX9-LABEL: sample_l_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_l_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_l_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: image_sample_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_l_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: image_sample_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
|
|
; GFX9-LABEL: sample_c_l_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
|
|
; GFX9-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_l_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_l_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_l_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
|
|
; GFX9-LABEL: sample_c_l_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v5, v3
|
|
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s12
|
|
; GFX9-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_l_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_l_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_l_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
|
|
; GFX9-LABEL: sample_lz_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_lz_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_lz_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_lz_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
|
; GFX9-LABEL: sample_lz_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
|
|
; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_lz_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_lz_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_lz_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
|
|
; GFX12-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
|
|
; GFX9-LABEL: sample_c_lz_1d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_lz_1d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_lz_1d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_lz_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
|
|
; GFX9-LABEL: sample_c_lz_2d:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
|
|
; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_lz_2d:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_lz_2d:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_lz_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
|
|
; GFX9-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v13, v8
|
|
; GFX9-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX9-NEXT: v_perm_b32 v12, v7, v6, s12
|
|
; GFX9-NEXT: v_perm_b32 v11, v5, v4, s12
|
|
; GFX9-NEXT: v_perm_b32 v10, v3, v2, s12
|
|
; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX10-NEXT: v_perm_b32 v12, v7, v6, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v11, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v10, v3, v2, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v6, v5, v4, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
|
|
; GFX9-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX9: ; %bb.0: ; %main_body
|
|
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
|
|
; GFX9-NEXT: v_mov_b32_e32 v13, v8
|
|
; GFX9-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX9-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX9-NEXT: v_perm_b32 v12, v7, v6, s12
|
|
; GFX9-NEXT: v_perm_b32 v11, v5, v4, s12
|
|
; GFX9-NEXT: v_perm_b32 v10, v3, v2, s12
|
|
; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v13, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v1
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v0
|
|
; GFX10-NEXT: v_perm_b32 v12, v7, v6, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v11, v5, v4, 0x5040100
|
|
; GFX10-NEXT: v_perm_b32 v10, v3, v2, 0x5040100
|
|
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
|
|
; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v6, v5, v4, 0x5040100
|
|
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
|
|
; GFX12-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone }
|