It is documented that immarg is only valid on intrinsic declarations, although the verifier also tolerates it on intrinsic calls. This patch updates tests that are not specifically testing the behavior of the IR parser or verifier.
104 lines
5.5 KiB
LLVM
104 lines
5.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
|
|
|
|
; This is a slightly modified IR from real case to make it concise.
|
|
define amdgpu_ps void @_amdgpu_ps_main(i32 inreg %PrimMask, <2 x float> %InterpCenter) #0 {
|
|
; GCN-LABEL: _amdgpu_ps_main:
|
|
; GCN: ; %bb.0: ; %.entry
|
|
; GCN-NEXT: s_mov_b32 s1, exec_lo
|
|
; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GCN-NEXT: s_mov_b32 m0, s0
|
|
; GCN-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v0
|
|
; GCN-NEXT: lds_param_load v4, attr1.x wait_vdst:15
|
|
; GCN-NEXT: lds_param_load v5, attr1.y wait_vdst:15
|
|
; GCN-NEXT: lds_param_load v6, attr1.z wait_vdst:15
|
|
; GCN-NEXT: lds_param_load v7, attr1.w wait_vdst:15
|
|
; GCN-NEXT: v_mbcnt_lo_u32_b32 v8, -1, 0
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
|
|
; GCN-NEXT: v_mbcnt_hi_u32_b32 v8, -1, v8
|
|
; GCN-NEXT: v_interp_p10_f32 v9, v5, v3, v5 wait_exp:2
|
|
; GCN-NEXT: v_interp_p10_f32 v11, v6, v3, v6 wait_exp:1
|
|
; GCN-NEXT: v_interp_p10_f32 v10, v7, v3, v7 wait_exp:0
|
|
; GCN-NEXT: v_interp_p10_f32 v3, v4, v3, v4 wait_exp:7
|
|
; GCN-NEXT: v_interp_p2_f32 v5, v5, v2, v9 wait_exp:7
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GCN-NEXT: v_interp_p2_f32 v6, v6, v2, v11 wait_exp:7
|
|
; GCN-NEXT: v_interp_p2_f32 v7, v7, v2, v10 wait_exp:7
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
|
|
; GCN-NEXT: v_interp_p2_f32 v2, v4, v2, v3 wait_exp:7
|
|
; GCN-NEXT: v_mov_b32_dpp v5, v5 dpp8:[1,0,3,2,5,4,7,6]
|
|
; GCN-NEXT: v_and_b32_e32 v8, 1, v8
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GCN-NEXT: v_mov_b32_dpp v7, v7 dpp8:[1,0,3,2,5,4,7,6]
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
|
|
; GCN-NEXT: v_dual_cndmask_b32 v3, v5, v6 :: v_dual_cndmask_b32 v4, v6, v5
|
|
; GCN-NEXT: v_dual_cndmask_b32 v5, v2, v7 :: v_dual_cndmask_b32 v2, v7, v2
|
|
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
|
; GCN-NEXT: v_mov_b32_dpp v4, v4 dpp8:[1,0,3,2,5,4,7,6]
|
|
; GCN-NEXT: v_mov_b32_dpp v5, v5 dpp8:[1,0,3,2,5,4,7,6]
|
|
; GCN-NEXT: s_mov_b32 exec_lo, s1
|
|
; GCN-NEXT: exp dual_src_blend0 v3, v2, off, off
|
|
; GCN-NEXT: exp dual_src_blend1 v4, v5, off, off done
|
|
; GCN-NEXT: s_endpgm
|
|
.entry:
|
|
%InterpCenter.i0 = extractelement <2 x float> %InterpCenter, i64 0
|
|
%InterpCenter.i1 = extractelement <2 x float> %InterpCenter, i64 1
|
|
%i6 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %PrimMask)
|
|
%i7 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 1, i32 %PrimMask)
|
|
%i8 = call float @llvm.amdgcn.lds.param.load(i32 2, i32 1, i32 %PrimMask)
|
|
%i9 = call float @llvm.amdgcn.lds.param.load(i32 3, i32 1, i32 %PrimMask)
|
|
|
|
%i14 = call float @llvm.amdgcn.interp.inreg.p10(float %i8, float %InterpCenter.i0, float %i8)
|
|
%i15 = call float @llvm.amdgcn.interp.inreg.p2(float %i8, float %InterpCenter.i1, float %i14)
|
|
|
|
%i16 = call float @llvm.amdgcn.interp.inreg.p10(float %i7, float %InterpCenter.i0, float %i7)
|
|
%i17 = call float @llvm.amdgcn.interp.inreg.p2(float %i7, float %InterpCenter.i1, float %i16)
|
|
|
|
%i18 = call float @llvm.amdgcn.interp.inreg.p10(float %i6, float %InterpCenter.i0, float %i6)
|
|
%i19 = call float @llvm.amdgcn.interp.inreg.p2(float %i6, float %InterpCenter.i1, float %i18)
|
|
|
|
%i20 = call float @llvm.amdgcn.interp.inreg.p10(float %i9, float %InterpCenter.i0, float %i9)
|
|
%i21 = call float @llvm.amdgcn.interp.inreg.p2(float %i9, float %InterpCenter.i1, float %i20)
|
|
|
|
%i34 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
|
%i35 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %i34)
|
|
%i36 = and i32 %i35, 1
|
|
%.not = icmp eq i32 %i36, 0
|
|
|
|
%i37 = bitcast float %i15 to i32
|
|
%i38 = bitcast float %i17 to i32
|
|
%i39 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %i38, i32 14570689)
|
|
%i40 = select i1 %.not, i32 %i37, i32 %i39
|
|
%i41 = bitcast i32 %i40 to float
|
|
%i42 = select i1 %.not, i32 %i39, i32 %i37
|
|
%i43 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %i42, i32 14570689)
|
|
%i44 = bitcast i32 %i43 to float
|
|
|
|
%i45 = bitcast float %i19 to i32
|
|
%i46 = bitcast float %i21 to i32
|
|
%i47 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %i46, i32 14570689)
|
|
%i48 = select i1 %.not, i32 %i45, i32 %i47
|
|
%i49 = bitcast i32 %i48 to float
|
|
%i50 = select i1 %.not, i32 %i47, i32 %i45
|
|
%i51 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %i50, i32 14570689)
|
|
%i52 = bitcast i32 %i51 to float
|
|
call void @llvm.amdgcn.exp.f32(i32 21, i32 3, float %i41, float %i49, float undef, float undef, i1 false, i1 true)
|
|
call void @llvm.amdgcn.exp.f32(i32 22, i32 3, float %i44, float %i52, float undef, float undef, i1 true, i1 true)
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
|
|
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #2
|
|
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32 immarg) #3
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #4
|
|
declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #1
|
|
declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #1
|
|
declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone speculatable willreturn }
|
|
attributes #2 = { nounwind readnone willreturn }
|
|
attributes #3 = { convergent nounwind readnone willreturn }
|
|
attributes #4 = { inaccessiblememonly nounwind willreturn writeonly }
|