Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
Jay Foad eb7491769a [AMDGPU] Reimplement the GFX11 early release VGPRs optimization
Implement this optimization in SIInsertWaitcnts, where we already have
information about whether there might be outstanding VMEM store
instructions. This has the following advantages:
- Correctly handles atomics-with-return.
- Correctly handles call instructions.
- Should be faster because it does not require running a separate pass.

Differential Revision: https://reviews.llvm.org/D153279
2023-06-19 17:12:54 +01:00

1283 lines
48 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x1
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_load_imm:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x4
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_imm:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x4
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
%bitcast = bitcast i32 %load to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
; GFX678-LABEL: s_buffer_load_index:
; GFX678: ; %bb.0: ; %main_body
; GFX678-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v0, s0
; GFX678-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX678-NEXT: s_endpgm
;
; GFX910-LABEL: s_buffer_load_index:
; GFX910: ; %bb.0: ; %main_body
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: v_mov_b32_e32 v0, s0
; GFX910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_index:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast i32 %load to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
; GFX678910-LABEL: s_buffer_load_index_divergent:
; GFX678910: ; %bb.0: ; %main_body
; GFX678910-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX678910-NEXT: s_waitcnt vmcnt(0)
; GFX678910-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX678910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_index_divergent:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast i32 %load to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_loadx2_imm:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: v_mov_b32_e32 v1, s1
; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_loadx2_imm:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx2_imm:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x40
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
%x = extractelement <2 x float> %bitcast, i32 0
%y = extractelement <2 x float> %bitcast, i32 1
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
; GFX678-LABEL: s_buffer_loadx2_index:
; GFX678: ; %bb.0: ; %main_body
; GFX678-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v0, s0
; GFX678-NEXT: v_mov_b32_e32 v1, s1
; GFX678-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX678-NEXT: s_endpgm
;
; GFX910-LABEL: s_buffer_loadx2_index:
; GFX910: ; %bb.0: ; %main_body
; GFX910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: v_mov_b32_e32 v0, s0
; GFX910-NEXT: v_mov_b32_e32 v1, s1
; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx2_index:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
%x = extractelement <2 x float> %bitcast, i32 0
%y = extractelement <2 x float> %bitcast, i32 1
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
; GFX678910: ; %bb.0: ; %main_body
; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
; GFX678910-NEXT: s_waitcnt vmcnt(0)
; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX678910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx2_index_divergent:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <2 x i32> %load to <2 x float>
%x = extractelement <2 x float> %bitcast, i32 0
%y = extractelement <2 x float> %bitcast, i32 1
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_loadx3_imm:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: v_mov_b32_e32 v1, s1
; GFX67-NEXT: v_mov_b32_e32 v2, s2
; GFX67-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_loadx3_imm:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
; GFX8910-NEXT: v_mov_b32_e32 v2, s2
; GFX8910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx3_imm:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x40
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
%x = extractelement <3 x float> %bitcast, i32 0
%y = extractelement <3 x float> %bitcast, i32 1
%z = extractelement <3 x float> %bitcast, i32 2
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
; GFX678-LABEL: s_buffer_loadx3_index:
; GFX678: ; %bb.0: ; %main_body
; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v0, s0
; GFX678-NEXT: v_mov_b32_e32 v1, s1
; GFX678-NEXT: v_mov_b32_e32 v2, s2
; GFX678-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX678-NEXT: s_endpgm
;
; GFX910-LABEL: s_buffer_loadx3_index:
; GFX910: ; %bb.0: ; %main_body
; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: v_mov_b32_e32 v0, s0
; GFX910-NEXT: v_mov_b32_e32 v1, s1
; GFX910-NEXT: v_mov_b32_e32 v2, s2
; GFX910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx3_index:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
%x = extractelement <3 x float> %bitcast, i32 0
%y = extractelement <3 x float> %bitcast, i32 1
%z = extractelement <3 x float> %bitcast, i32 2
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
; GFX6-LABEL: s_buffer_loadx3_index_divergent:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX6-NEXT: s_endpgm
;
; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
; GFX78910: ; %bb.0: ; %main_body
; GFX78910-NEXT: buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
; GFX78910-NEXT: s_waitcnt vmcnt(0)
; GFX78910-NEXT: exp mrt0 v0, v1, v2, v0 done vm
; GFX78910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx3_index_divergent:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v1, v2, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <3 x i32> %load to <3 x float>
%x = extractelement <3 x float> %bitcast, i32 0
%y = extractelement <3 x float> %bitcast, i32 1
%z = extractelement <3 x float> %bitcast, i32 2
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_loadx4_imm:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: v_mov_b32_e32 v1, s1
; GFX67-NEXT: v_mov_b32_e32 v2, s2
; GFX67-NEXT: v_mov_b32_e32 v3, s3
; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_loadx4_imm:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
; GFX8910-NEXT: v_mov_b32_e32 v2, s2
; GFX8910-NEXT: v_mov_b32_e32 v3, s3
; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx4_imm:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0xc8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
%x = extractelement <4 x float> %bitcast, i32 0
%y = extractelement <4 x float> %bitcast, i32 1
%z = extractelement <4 x float> %bitcast, i32 2
%w = extractelement <4 x float> %bitcast, i32 3
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
; GFX678-LABEL: s_buffer_loadx4_index:
; GFX678: ; %bb.0: ; %main_body
; GFX678-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v0, s0
; GFX678-NEXT: v_mov_b32_e32 v1, s1
; GFX678-NEXT: v_mov_b32_e32 v2, s2
; GFX678-NEXT: v_mov_b32_e32 v3, s3
; GFX678-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX678-NEXT: s_endpgm
;
; GFX910-LABEL: s_buffer_loadx4_index:
; GFX910: ; %bb.0: ; %main_body
; GFX910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: v_mov_b32_e32 v0, s0
; GFX910-NEXT: v_mov_b32_e32 v1, s1
; GFX910-NEXT: v_mov_b32_e32 v2, s2
; GFX910-NEXT: v_mov_b32_e32 v3, s3
; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx4_index:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
%x = extractelement <4 x float> %bitcast, i32 0
%y = extractelement <4 x float> %bitcast, i32 1
%z = extractelement <4 x float> %bitcast, i32 2
%w = extractelement <4 x float> %bitcast, i32 3
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
; GFX678910: ; %bb.0: ; %main_body
; GFX678910-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
; GFX678910-NEXT: s_waitcnt vmcnt(0)
; GFX678910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX678910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_loadx4_index_divergent:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
; GFX11-NEXT: s_endpgm
main_body:
%load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
%bitcast = bitcast <4 x i32> %load to <4 x float>
%x = extractelement <4 x float> %bitcast, i32 0
%y = extractelement <4 x float> %bitcast, i32 1
%z = extractelement <4 x float> %bitcast, i32 2
%w = extractelement <4 x float> %bitcast, i32 3
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm_mergex2:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: v_mov_b32_e32 v1, s1
; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_load_imm_mergex2:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_imm_mergex2:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b64 s[0:1], s[0:3], 0x4
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
%load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
%x = bitcast i32 %load0 to float
%y = bitcast i32 %load1 to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm_mergex4:
; GFX67: ; %bb.0: ; %main_body
; GFX67-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: v_mov_b32_e32 v0, s0
; GFX67-NEXT: v_mov_b32_e32 v1, s1
; GFX67-NEXT: v_mov_b32_e32 v2, s2
; GFX67-NEXT: v_mov_b32_e32 v3, s3
; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX67-NEXT: s_endpgm
;
; GFX8910-LABEL: s_buffer_load_imm_mergex4:
; GFX8910: ; %bb.0: ; %main_body
; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
; GFX8910-NEXT: v_mov_b32_e32 v2, s2
; GFX8910-NEXT: v_mov_b32_e32 v3, s3
; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX8910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_imm_mergex4:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_buffer_load_b128 s[0:3], s[0:3], 0x8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: v_mov_b32_e32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: v_mov_b32_e32 v3, s3
; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done
; GFX11-NEXT: s_endpgm
main_body:
%load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
%load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
%load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
%load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
%x = bitcast i32 %load0 to float
%y = bitcast i32 %load1 to float
%z = bitcast i32 %load2 to float
%w = bitcast i32 %load3 to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
ret void
}
@gv = external addrspace(1) global i32
define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
; GFX6-LABEL: s_buffer_load_index_across_bb:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: s_getpc_b64 s[4:5]
; GFX6-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX6-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_or_b32_e32 v0, 8, v0
; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: s_buffer_load_index_across_bb:
; GFX7: ; %bb.0: ; %main_body
; GFX7-NEXT: s_getpc_b64 s[4:5]
; GFX7-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX7-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX7-NEXT: v_or_b32_e32 v0, 8, v0
; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: s_buffer_load_index_across_bb:
; GFX8: ; %bb.0: ; %main_body
; GFX8-NEXT: s_getpc_b64 s[4:5]
; GFX8-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, s5
; GFX8-NEXT: flat_store_dword v[1:2], v0
; GFX8-NEXT: v_or_b32_e32 v0, 8, v0
; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_buffer_load_index_across_bb:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
; GFX9-NEXT: v_or_b32_e32 v0, 8, v0
; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: s_buffer_load_index_across_bb:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-NEXT: v_or_b32_e32 v0, 8, v0
; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_index_across_bb:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: s_getpc_b64 s[4:5]
; GFX11-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
; GFX11-NEXT: v_or_b32_e32 v0, 8, v0
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
main_body:
%tmp = shl i32 %index, 4
store i32 %tmp, ptr addrspace(1) @gv
br label %bb1
bb1: ; preds = %main_body
%tmp1 = or i32 %tmp, 8
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
%bitcast = bitcast i32 %load to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
; GFX678910: ; %bb.0: ; %main_body
; GFX678910-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX678910-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
; GFX678910-NEXT: s_waitcnt vmcnt(0)
; GFX678910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX678910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
; GFX11: ; %bb.0: ; %main_body
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: exp mrt0 v0, v1, v0, v0 done
; GFX11-NEXT: s_endpgm
main_body:
%tmp = shl i32 %index, 4
br label %bb1
bb1: ; preds = %main_body
%tmp1 = or i32 %tmp, 8
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
%tmp2 = or i32 %tmp1, 4
%load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
%bitcast = bitcast i32 %load to float
%bitcast2 = bitcast i32 %load2 to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
ret void
}
define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_neg1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, -1
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX78-LABEL: s_buffer_load_imm_neg1:
; GFX78: ; %bb.0:
; GFX78-NEXT: s_mov_b32 s4, -1
; GFX78-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX78-NEXT: s_waitcnt lgkmcnt(0)
; GFX78-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_neg1:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, -1
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_neg1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, -1
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_neg4:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, -4
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_neg4:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fffffff
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_neg4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, -4
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_neg4:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, -4
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_neg4:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, -4
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_neg8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, -8
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_neg8:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffffffe
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_neg8:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, -8
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_neg8:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, -8
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_neg8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, -8
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit31:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_brev_b32 s4, 1
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit31:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_bit31:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_brev_b32 s4, 1
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_bit31:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_brev_b32 s4, 1
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit31:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_brev_b32 s4, 1
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit30:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 2.0
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit30:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x10000000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_bit30:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 2.0
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_bit30:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, 2.0
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit30:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, 2.0
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit29:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_brev_b32 s4, 4
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit29:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x8000000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_bit29:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_brev_b32 s4, 4
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_bit29:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_brev_b32 s4, 4
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit29:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_brev_b32 s4, 4
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit21:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0x200000
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit21:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_bit21:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 0x200000
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_bit21:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, 0x200000
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit21:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, 0x200000
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit20:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0x100000
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit20:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x40000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_bit20:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 0x100000
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_bit20:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, 0x100000
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit20:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, 0x100000
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0xfff00000
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffc0000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 0xfff00000
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_neg_bit20:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, 0xfff00000
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, 0xfff00000
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_bit19:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0x80000
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_bit19:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x20000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_bit19:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x80000
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_bit19:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x80000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s4, 0xfff80000
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x3ffe0000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s4, 0xfff80000
; GFX8-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; GFX910-LABEL: s_buffer_load_imm_neg_bit19:
; GFX910: ; %bb.0:
; GFX910-NEXT: s_mov_b32 s4, 0xfff80000
; GFX910-NEXT: s_buffer_load_dword s0, s[0:3], s4 offset:0x0
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
; GFX910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 s4, 0xfff80000
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_255:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_255:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_movk_i32 s4, 0xff
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_255:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_255:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0xff
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm_256:
; GFX67: ; %bb.0:
; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0x40
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_256:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_256:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x100
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm_1016:
; GFX67: ; %bb.0:
; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xfe
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1016:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3f8
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1016:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3f8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
; GFX67-LABEL: s_buffer_load_imm_1020:
; GFX67: ; %bb.0:
; GFX67-NEXT: s_buffer_load_dword s0, s[0:3], 0xff
; GFX67-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1020:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fc
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1020:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fc
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_1021:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_movk_i32 s4, 0x3fd
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_1021:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_movk_i32 s4, 0x3fd
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1021:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x3fd
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1021:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x3fd
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_1024:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_movk_i32 s4, 0x400
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_1024:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1024:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1024:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_1025:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_movk_i32 s4, 0x401
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_1025:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_movk_i32 s4, 0x401
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1025:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x401
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1025:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x401
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
ret i32 %load
}
define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
; GFX6-LABEL: s_buffer_load_imm_1028:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_movk_i32 s4, 0x400
; GFX6-NEXT: s_nop 3
; GFX6-NEXT: s_buffer_load_dword s0, s[0:3], s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX7-LABEL: s_buffer_load_imm_1028:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_buffer_load_dword s0, s[0:3], 0x100
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8910-LABEL: s_buffer_load_imm_1028:
; GFX8910: ; %bb.0:
; GFX8910-NEXT: s_buffer_load_dword s0, s[0:3], 0x400
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
; GFX8910-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_buffer_load_imm_1028:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_buffer_load_b32 s0, s[0:3], 0x400
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
ret i32 %load
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX6789: {{.*}}
; GFX789: {{.*}}
; GFX89: {{.*}}