The dot is too confusing for tools. Output temporaries would have '10.3-generic' so tools could parse it as an extension, device libs & the associated clang driver logic are also confused by the dot. After discussions, we decided it's better to just remove the '.' from the target name than fix each issue one by one.
2767 lines
128 KiB
LLVM
2767 lines
128 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[14:15], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
|
|
; GFX12-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
|
|
; GFX12-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX12-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX12-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX12-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_1:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 0
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_2:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_3:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 2
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_4:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
|
|
%res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_12:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_24:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 1
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_tfe_adjust_writemask_134:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_mov_b32_e32 v4, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v3, v0
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%res.vec = extractvalue {<4 x float>,i32} %v, 0
|
|
%res.f1 = extractelement <4 x float> %res.vec, i32 0
|
|
%res.f2 = extractelement <4 x float> %res.vec, i32 2
|
|
%res.f3 = extractelement <4 x float> %res.vec, i32 3
|
|
%res.err = extractvalue {<4 x float>,i32} %v, 1
|
|
%res.errf = bitcast i32 %res.err to float
|
|
%res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
|
|
%res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
|
|
%res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
|
|
%res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
|
|
; VERDE-LABEL: sample_1d_lwe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[14:15], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: v_mov_b32_e32 v5, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v3, v0
|
|
; VERDE-NEXT: v_mov_b32_e32 v4, v0
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_lwe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[14:15], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX6789-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15]
|
|
; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_1d_lwe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-NEXT: v_mov_b32_e32 v6, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v5, v0
|
|
; GFX10-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v6
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v7
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v8
|
|
; GFX10-NEXT: v_mov_b32_e32 v3, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: global_store_dword v6, v4, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_1d_lwe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v7, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v8, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v9, v6
|
|
; GFX11-NEXT: v_mov_b32_e32 v10, v6
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX11-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_lwe:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s14, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
|
|
; GFX12-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
|
|
; GFX12-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
|
|
; GFX12-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
|
|
; GFX12-NEXT: v_mov_b32_e32 v4, v10
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14
|
|
; GFX12-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: global_store_b32 v6, v4, s[12:13]
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
|
|
%v.vec = extractvalue {<4 x float>, i32} %v, 0
|
|
%v.err = extractvalue {<4 x float>, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret <4 x float> %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
|
|
; VERDE-LABEL: sample_3d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_3d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_3d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_3d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
|
|
; VERDE-LABEL: sample_cube:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cube:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cube:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cube:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
|
|
; VERDE-LABEL: sample_1darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1darray:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1darray:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_2darray:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_2darray:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_2darray:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_2darray:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
|
|
; VERDE-LABEL: sample_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
|
|
; VERDE-LABEL: sample_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_b_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_b_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_b_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_b_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_b_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_b_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_b_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_b_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_d v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_d v[0:3], [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
|
|
; VERDE-LABEL: sample_c_d_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_d_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_d_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_d_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_cl_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_cl_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
|
|
; VERDE-LABEL: sample_c_d_cl_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_cl_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_cl_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_cl_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_l_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_l_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_l v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_l_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_l_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_l_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_l_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
|
|
; VERDE-LABEL: sample_c_l_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_l_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_l_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_l_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_lz_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_lz_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
|
; VERDE-LABEL: sample_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_lz_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_lz_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
|
|
; VERDE-LABEL: sample_c_lz_1d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_1d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_lz_1d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_lz_1d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
|
|
; VERDE-LABEL: sample_c_lz_2d:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_lz_2d:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_lz_2d:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_lz_2d:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V1:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret float %v
|
|
}
|
|
|
|
define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; VERDE-NEXT: s_mov_b32 s15, 0xf000
|
|
; VERDE-NEXT: s_mov_b32 s14, -1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, v11
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v12
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: global_store_dword v11, v10, s[12:13]
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, v11
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v12
|
|
; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX10-NEXT: global_store_dword v11, v10, s[12:13]
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
|
|
; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: global_store_b32 v11, v1, s[12:13]
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V1_tfe:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_mov_b32_e32 v11, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
|
|
; GFX12-NEXT: v_mov_b32_e32 v12, v11
|
|
; GFX12-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
|
|
; GFX12-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: global_store_b32 v11, v1, s[12:13]
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {float, i32} %v, 0
|
|
%v.err = extractvalue {float, i32} %v, 1
|
|
store i32 %v.err, ptr addrspace(1) %out, align 4
|
|
ret float %v.vec
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V2:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <2 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
|
|
; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: v_mov_b32_e32 v9, 0
|
|
; VERDE-NEXT: v_mov_b32_e32 v10, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v11, v9
|
|
; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: v_mov_b32_e32 v0, v9
|
|
; VERDE-NEXT: v_mov_b32_e32 v1, v10
|
|
; VERDE-NEXT: v_mov_b32_e32 v2, v11
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX6789-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v11, v9
|
|
; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX6789-NEXT: v_mov_b32_e32 v1, v10
|
|
; GFX6789-NEXT: v_mov_b32_e32 v2, v11
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v10, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v11, v9
|
|
; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, v9
|
|
; GFX10-NEXT: v_mov_b32_e32 v1, v10
|
|
; GFX10-NEXT: v_mov_b32_e32 v2, v11
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX11-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, v0
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, v0
|
|
; GFX11-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_c_d_o_2darray_V2_tfe:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
|
|
; GFX12-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
|
|
; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
|
|
; GFX12-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
|
|
%v.vec = extractvalue {<2 x float>, i32} %v, 0
|
|
%v.f1 = extractelement <2 x float> %v.vec, i32 0
|
|
%v.f2 = extractelement <2 x float> %v.vec, i32 1
|
|
%v.err = extractvalue {<2 x float>, i32} %v, 1
|
|
%v.errf = bitcast i32 %v.err to float
|
|
%res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
|
|
%res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
|
|
%res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
|
|
ret <4 x float> %res.2
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_unorm:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_unorm:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_unorm:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_unorm:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_glc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_glc:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_NT
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_slc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_slc:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: sample_1d_glc_slc:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: sample_1d_glc_slc:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: sample_1d_glc_slc:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: sample_1d_glc_slc:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_LU
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
|
|
ret <4 x float> %v
|
|
}
|
|
|
|
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_0:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_0:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_0:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_0:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%elt0 = extractelement <4 x float> %r, i32 0
|
|
ret float %elt0
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_01:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_01:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_01:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_01:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_012:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_012:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_012:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_012:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_12:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_12:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_03:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_03:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_03:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_03:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_13:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_13:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_123:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_123:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
|
ret <3 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_none_enabled:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_none_enabled:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
ret <4 x float> %r
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_123_to_12:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_123_to_12:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
|
|
; VERDE-LABEL: adjust_writemask_sample_013_to_13:
|
|
; VERDE: ; %bb.0: ; %main_body
|
|
; VERDE-NEXT: s_mov_b64 s[12:13], exec
|
|
; VERDE-NEXT: s_wqm_b64 exec, exec
|
|
; VERDE-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; VERDE-NEXT: s_waitcnt vmcnt(0)
|
|
; VERDE-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX6789: ; %bb.0: ; %main_body
|
|
; GFX6789-NEXT: s_mov_b64 s[12:13], exec
|
|
; GFX6789-NEXT: s_wqm_b64 exec, exec
|
|
; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13]
|
|
; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
|
|
; GFX6789-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX6789-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX10PLUS: ; %bb.0: ; %main_body
|
|
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX12-LABEL: adjust_writemask_sample_013_to_13:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b32 s12, exec_lo
|
|
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
|
|
; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
|
|
; GFX12-NEXT: s_wait_samplecnt 0x0
|
|
; GFX12-NEXT: ; return to shader part epilog
|
|
main_body:
|
|
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
|
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
|
ret <2 x float> %out
|
|
}
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone }
|