Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
Mirko Brkusanin 926746d22a [AMDGPU][GFX11] Legalize and select partial NSA MIMG instructions
If more registers are needed for VAddr then the NSA format allows then the
final register can act as a contigous set of remaining addresses. Update
legalizer to pack register for this new format and allow instruction
selection to use NSA encoding when number of addresses exceeds max size.
Also update SIShrinkInstructions to handle partial NSA.

Differential Revision: https://reviews.llvm.org/D144034
2023-02-23 13:33:34 +01:00

1330 lines
63 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
; GFX9-LABEL: sample_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX9-LABEL: sample_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
; GFX9-LABEL: sample_3d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_3d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
; GFX9-LABEL: sample_cube:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cube:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_cube:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
; GFX9-LABEL: sample_1darray:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_1darray:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_1darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
; GFX9-LABEL: sample_2darray:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_2darray:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_2darray:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
; GFX9-LABEL: sample_c_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
; GFX9-LABEL: sample_c_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
; GFX9-LABEL: sample_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
; GFX9-LABEL: sample_c_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_c_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mov_b32_e32 v3, v0
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
; GFX9-LABEL: sample_b_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_b_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
; GFX9-LABEL: sample_b_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_b_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
; GFX9-LABEL: sample_c_b_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_b_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
; GFX9-LABEL: sample_c_b_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_b_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
; GFX9-LABEL: sample_b_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_b_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_b_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mov_b32_e32 v3, v0
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_b_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
; GFX9-LABEL: sample_c_b_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_b_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_c_b_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b64 s[12:13], exec
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: s_mov_b32 s14, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v7, v4
; GFX9-NEXT: v_mov_b32_e32 v5, v1
; GFX9-NEXT: v_mov_b32_e32 v4, v0
; GFX9-NEXT: v_perm_b32 v6, v3, v2, s14
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_b_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s12, exec_lo
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_b_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_mov_b32 s12, exec_lo
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX11-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
; GFX9-LABEL: sample_d_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GFX9-LABEL: sample_d_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v4, v5, v4, s12
; GFX9-NEXT: v_perm_b32 v3, v3, v2, s12
; GFX9-NEXT: v_perm_b32 v2, v1, v0, s12
; GFX9-NEXT: image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX11-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
; GFX11-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
; GFX9-LABEL: sample_d_3d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v12, v8
; GFX9-NEXT: v_mov_b32_e32 v10, v5
; GFX9-NEXT: v_mov_b32_e32 v8, v2
; GFX9-NEXT: v_perm_b32 v11, v7, v6, s12
; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12
; GFX9-NEXT: v_perm_b32 v7, v1, v0, s12
; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_3d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v12, v8
; GFX10-NEXT: v_mov_b32_e32 v10, v5
; GFX10-NEXT: v_mov_b32_e32 v8, v2
; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100
; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100
; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_3d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[7:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
; GFX9-LABEL: sample_c_d_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
; GFX9-LABEL: sample_c_d_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: v_mov_b32_e32 v7, v3
; GFX9-NEXT: v_mov_b32_e32 v8, v2
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v3, v6, v5, s12
; GFX9-NEXT: v_perm_b32 v2, v4, v7, s12
; GFX9-NEXT: v_perm_b32 v1, v8, v1, s12
; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
; GFX9-LABEL: sample_d_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v2, v3, v2, s12
; GFX9-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_d_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v5, v5, v4, s12
; GFX9-NEXT: v_perm_b32 v4, v3, v2, s12
; GFX9-NEXT: v_perm_b32 v3, v1, v0, s12
; GFX9-NEXT: image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
; GFX9-LABEL: sample_c_d_cl_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v3, v4, v3, s12
; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_cl_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_cl_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
; GFX9-LABEL: sample_c_d_cl_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v11, v7
; GFX9-NEXT: v_mov_b32_e32 v7, v0
; GFX9-NEXT: v_perm_b32 v10, v6, v5, s12
; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12
; GFX9-NEXT: v_perm_b32 v8, v2, v1, s12
; GFX9-NEXT: image_sample_c_d_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_cl_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
; GFX9-LABEL: sample_l_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
; GFX9-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_l_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_l_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
; GFX9-LABEL: sample_l_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
; GFX9-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_l_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
; GFX11-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
; GFX9-LABEL: sample_c_l_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
; GFX9-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_l_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_l_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
; GFX9-LABEL: sample_c_l_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mov_b32_e32 v3, v0
; GFX9-NEXT: v_perm_b32 v4, v2, v1, s12
; GFX9-NEXT: image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_l_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_l_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
; GFX9-LABEL: sample_lz_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_lz_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_lz_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX9-LABEL: sample_lz_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_lz_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
; GFX9-LABEL: sample_c_lz_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_lz_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_lz_1d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
; GFX9-LABEL: sample_c_lz_2d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_lz_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_lz_2d:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GFX9-LABEL: sample_c_d_o_2darray_V1:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v13, v8
; GFX9-NEXT: v_mov_b32_e32 v9, v1
; GFX9-NEXT: v_mov_b32_e32 v8, v0
; GFX9-NEXT: v_perm_b32 v12, v7, v6, s12
; GFX9-NEXT: v_perm_b32 v11, v5, v4, s12
; GFX9-NEXT: v_perm_b32 v10, v3, v2, s12
; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V1:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v13, v8
; GFX10-NEXT: v_mov_b32_e32 v9, v1
; GFX10-NEXT: v_mov_b32_e32 v8, v0
; GFX10-NEXT: v_perm_b32 v12, v7, v6, 0x5040100
; GFX10-NEXT: v_perm_b32 v11, v5, v4, 0x5040100
; GFX10-NEXT: v_perm_b32 v10, v3, v2, 0x5040100
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_o_2darray_V1:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret float %v
}
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
; GFX9-LABEL: sample_c_d_o_2darray_V2:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: v_mov_b32_e32 v13, v8
; GFX9-NEXT: v_mov_b32_e32 v9, v1
; GFX9-NEXT: v_mov_b32_e32 v8, v0
; GFX9-NEXT: v_perm_b32 v12, v7, v6, s12
; GFX9-NEXT: v_perm_b32 v11, v5, v4, s12
; GFX9-NEXT: v_perm_b32 v10, v3, v2, s12
; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_o_2darray_V2:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v13, v8
; GFX10-NEXT: v_mov_b32_e32 v9, v1
; GFX10-NEXT: v_mov_b32_e32 v8, v0
; GFX10-NEXT: v_perm_b32 v12, v7, v6, 0x5040100
; GFX10-NEXT: v_perm_b32 v11, v5, v4, 0x5040100
; GFX10-NEXT: v_perm_b32 v10, v3, v2, 0x5040100
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_o_2darray_V2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100
; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <2 x float> %v
}
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }