SIInsertWaitcnts inserts waitcnt instructions to resolve data dependencies. The GFX10+ vscnt (VMEM store count) counter is never used in this way. It is only used to resolve memory dependencies, and that is handled by SIMemoryLegalizer. Hence there is no need to conservatively wait for vscnt to be 0 on function entry and before returns. Differential Revision: https://reviews.llvm.org/D153537
2219 lines
104 KiB
LLVM
2219 lines
104 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[14:15], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 0
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 2
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 2
|
|
%res.f3 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
|
|
%res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_lwe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[14:15], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_lwe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_lwe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_lwe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
|
|
; VERDE-LABEL: sample_3d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_3d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_3d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
|
|
; VERDE-LABEL: sample_cube:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cube:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cube:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
|
|
; VERDE-LABEL: sample_1darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1darray:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_2darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_2darray:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
|
|
; VERDE-LABEL: sample_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
|
|
; VERDE-LABEL: sample_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_c_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_l_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_l_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_l_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_l_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_lz_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_lz_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_lz_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_lz_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v11
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v12
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v11
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v12
|
|
; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
|
|
; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v11, v1, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {float, i32} %v, 0
|
|
%v.err = extractvalue {float, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret float %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v11, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v10
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v11
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v11, v9
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v10
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v11
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v9
|
|
; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v10
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v11
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<2 x float>, i32} %v, 0
|
|
%v.f1 = extractelement <2 x float> %v.vec, i32 0
|
|
%v.f2 = extractelement <2 x float> %v.vec, i32 1
|
|
%v.err = extractvalue {<2 x float>, i32} %v, 1
|
|
%v.errf = bitcast i32 %v.err to float
|
|
%res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
|
|
%res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
|
|
%res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
|
|
ret <4 x float> %res.2
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_unorm:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_unorm:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_unorm:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_glc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_slc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_glc_slc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_0:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_0:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_0:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%elt0 = extractelement <4 x float> %r, i32 0
|
|
ret float %elt0
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_01:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_01:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_01:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_012:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_012:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_012:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_12:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_03:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_03:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_03:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_13:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_123:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_none_enabled:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123_to_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_013_to_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone }
|