[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
197 lines
10 KiB
LLVM
197 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
|
|
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
|
|
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
|
|
|
|
; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
|
|
; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.ptr.buffer.load.lds),
|
|
|
|
declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
|
|
|
|
;---------------------------------------------------------------------y
|
|
; dwordx3
|
|
;---------------------------------------------------------------------
|
|
|
|
define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
|
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx3:
|
|
; GFX950-SDAG: ; %bb.0:
|
|
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-SDAG-NEXT: s_nop 0
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
|
|
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
|
|
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX950-SDAG-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx3:
|
|
; GFX950-GISEL: ; %bb.0:
|
|
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds
|
|
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
|
|
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX950-GISEL-NEXT: ; return to shader part epilog
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 0, i32 0)
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 4, i32 1)
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 8, i32 2)
|
|
%res = load float, ptr addrspace(3) %lds
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx3_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx3_imm_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:2048 lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], 0 idxen offen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 idxen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
|
ret void
|
|
}
|
|
|
|
;---------------------------------------------------------------------y
|
|
; dwordx4
|
|
;---------------------------------------------------------------------
|
|
|
|
define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
|
|
; GFX950-SDAG-LABEL: buffer_load_lds_dwordx4:
|
|
; GFX950-SDAG: ; %bb.0:
|
|
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX950-SDAG-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-SDAG-NEXT: s_nop 0
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
|
; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
|
|
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-SDAG-NEXT: ds_read_b32 v0, v0
|
|
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX950-SDAG-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX950-GISEL-LABEL: buffer_load_lds_dwordx4:
|
|
; GFX950-GISEL: ; %bb.0:
|
|
; GFX950-GISEL-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds
|
|
; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds
|
|
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX950-GISEL-NEXT: ds_read_b32 v0, v0
|
|
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX950-GISEL-NEXT: ; return to shader part epilog
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 0, i32 0)
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 4, i32 1)
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 8, i32 2)
|
|
%res = load float, ptr addrspace(3) %lds
|
|
ret float %res
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx4_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx4_imm_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:2048 lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 0, i32 2048, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], 0 idxen offen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 idxen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
|
|
; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
|
|
; GFX950: ; %bb.0:
|
|
; GFX950-NEXT: s_mov_b32 m0, s4
|
|
; GFX950-NEXT: s_nop 0
|
|
; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen offset:2048 lds
|
|
; GFX950-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0)
|
|
ret void
|
|
}
|