Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
Carl Ritson 62aa596ba1 [AMDGPU] Add no return image_sample intrinsics and instructions (#97542)
An appropriately configured image resource descriptor can trigger
image_sample instructions to store outputs directly to a linked memory
location instead of returning to VGPRs.

This is opaque to the backend as instruction encoding is unchanged;
however, a mechanism is require to allow frontends to communicate that
these instructions do not require destination VGPRs and store to memory.
Flagging these as stores means they will not be optimized away.
2024-07-20 17:26:58 +09:00

480 lines
25 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX10PLUS-LABEL: sample_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
; GFX10PLUS-LABEL: sample_3d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_3d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
; GFX10PLUS-LABEL: sample_cube_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_cube_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
; GFX10PLUS-LABEL: sample_1darray_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_1darray_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
; GFX10PLUS-LABEL: sample_2darray_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_2darray_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_b_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_b_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_b_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_b_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
; GFX10PLUS-LABEL: sample_c_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_c_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
; GFX10PLUS-LABEL: sample_c_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_c_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; GFX10PLUS-LABEL: sample_d_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
; GFX10PLUS-LABEL: sample_l_1d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_l_1d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
; GFX10PLUS-LABEL: sample_l_2d_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_l_2d_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_1:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_1:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_mov_b32 s12, exec_lo
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: ; return to shader part epilog
main_body:
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-LABEL: sample_nortn_mix_2:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0
; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: sample_nortn_mix_2:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: s_mov_b32 s12, exec_lo
; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-NEXT: v_mov_b32_e32 v4, v0
; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %v
}
define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_3:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT: image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_3:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT: image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT: image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v.0 = extractelement <4 x float> %v, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %u
}
define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
; GFX10PLUS-SDAG: ; %bb.0: ; %main_body
; GFX10PLUS-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(2)
; GFX10PLUS-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-SDAG-NEXT: ; return to shader part epilog
;
; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
; GFX10PLUS-GISEL: ; %bb.0: ; %main_body
; GFX10PLUS-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX10PLUS-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10PLUS-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(2)
; GFX10PLUS-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10PLUS-GISEL-NEXT: ; return to shader part epilog
;
; GFX12-SDAG-LABEL: sample_nortn_mix_4:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: s_mov_b32 s12, exec_lo
; GFX12-SDAG-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-SDAG-NEXT: image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x1
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x2
; GFX12-SDAG-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: sample_nortn_mix_4:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: s_mov_b32 s12, exec_lo
; GFX12-GISEL-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX12-GISEL-NEXT: image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x1
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x2
; GFX12-GISEL-NEXT: image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%v.0 = extractelement <4 x float> %v, i32 0
%v.1 = extractelement <4 x float> %v, i32 0
%v.2 = extractelement <4 x float> %v, i32 0
%v.3 = extractelement <4 x float> %v, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%u.0 = extractelement <4 x float> %u, i32 0
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret <4 x float> %u
}
define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
; GFX10PLUS: ; %bb.0: ; %main_body
; GFX10PLUS-NEXT: image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: sample_d_1d_g16_nortn:
; GFX12: ; %bb.0: ; %main_body
; GFX12-NEXT: image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
ret void
}
declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX10-GISEL: {{.*}}
; GFX10-SDAG: {{.*}}
; GFX11: {{.*}}
; GFX11-GISEL: {{.*}}
; GFX11-SDAG: {{.*}}