Call generateWaitcnt unconditionally at the end of SIInsertWaitcnts::insertWaitcntInBlock. Even if we don't need to generate a new waitcnt instruction it has the effect of combining or removing redundant waitcnts that were already present. Tests show various small improvements in waitcnt placement.
1973 lines
73 KiB
LLVM
1973 lines
73 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
|
|
|
|
define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
|
|
; GCN-LABEL: test_kill_depth_0_imm_pos:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 true)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
|
|
; WAVE64-LABEL: test_kill_depth_0_imm_neg:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: s_andn2_b64 exec, exec, exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB1_1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB1_1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_0_imm_neg:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB1_1
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB1_1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
|
|
; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; WAVE64-NEXT: ; %bb.1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB2_2:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB2_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB2_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
|
|
; WAVE64-LABEL: test_kill_depth_var:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB3_1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB3_1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_var:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB3_1
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB3_1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
%cmp = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2_same:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB4_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB4_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB4_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_var_x2_same:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB4_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
%cmp = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB5_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB5_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB5_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_var_x2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB5_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
%cmp.x = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.x)
|
|
%cmp.y = fcmp olt float %y, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.y)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2_instructions:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB6_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB6_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB6_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_depth_var_x2_instructions:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB6_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
%cmp.x = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.x)
|
|
%y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
|
|
%cmp.y = fcmp olt float %y, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.y)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: why does the skip depend on the asm length in the same block?
|
|
define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; SI-NEXT: ; %bb.1: ; %exit
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; SI-NEXT: s_branch .LBB7_5
|
|
; SI-NEXT: .LBB7_2: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; SI-NEXT: ; %bb.3: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; SI-NEXT: s_branch .LBB7_5
|
|
; SI-NEXT: .LBB7_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB7_5:
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE64-NEXT: .LBB7_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB7_5:
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE32-NEXT: .LBB7_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB7_5:
|
|
;
|
|
; GFX11-LABEL: test_kill_control_flow:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; GFX11-NEXT: ; %bb.1: ; %exit
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX11-NEXT: s_branch .LBB7_5
|
|
; GFX11-NEXT: .LBB7_2: ; %bb
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; GFX11-NEXT: ; %bb.3: ; %bb
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX11-NEXT: s_branch .LBB7_5
|
|
; GFX11-NEXT: .LBB7_4:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB7_5:
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
; TODO: We could do an early-exit here (the branch above is uniform!)
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
br label %exit
|
|
|
|
exit:
|
|
ret float 1.0
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow_remainder:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: v_mov_b32_e32 v9, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB8_3
|
|
; SI-NEXT: ; %bb.1: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v8, -1
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; SI-NEXT: ; %bb.2: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v9, -2
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: .LBB8_3: ; %exit
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB8_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB8_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB8_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_control_flow_remainder:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB8_2
|
|
; GFX11-NEXT: ; %bb.1: ; %exit
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
|
|
; GFX11-NEXT: s_nop 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB8_2: ; %bb
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v8, -1
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; GFX11-NEXT: ; %bb.3: ; %bb
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v9, -2
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
|
|
; GFX11-NEXT: s_nop 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB8_4:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
; TODO: We could do an early-exit here (the branch above is uniform!)
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
store volatile float %live.across, ptr addrspace(1) undef
|
|
%live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
|
|
br label %exit
|
|
|
|
exit:
|
|
%phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
|
|
store float %phi, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow_return:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_eq_u32 s0, 1
|
|
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
|
|
; SI-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; SI-NEXT: ; %bb.1: ; %entry
|
|
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; SI-NEXT: ; %bb.2: ; %exit
|
|
; SI-NEXT: s_branch .LBB9_5
|
|
; SI-NEXT: .LBB9_3: ; %bb
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_mov_b32_e32 v0, v7
|
|
; SI-NEXT: s_branch .LBB9_5
|
|
; SI-NEXT: .LBB9_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB9_5:
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE64-NEXT: .LBB9_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB9_5:
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE32-NEXT: .LBB9_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB9_5:
|
|
;
|
|
; GFX11-LABEL: test_kill_control_flow_return:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_cmp_eq_u32 s0, 1
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5]
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; GFX11-NEXT: ; %bb.1: ; %entry
|
|
; GFX11-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; GFX11-NEXT: ; %bb.2: ; %exit
|
|
; GFX11-NEXT: s_branch .LBB9_5
|
|
; GFX11-NEXT: .LBB9_3: ; %bb
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, v7
|
|
; GFX11-NEXT: s_branch .LBB9_5
|
|
; GFX11-NEXT: .LBB9_4:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB9_5:
|
|
entry:
|
|
%kill = icmp eq i32 %arg, 1
|
|
%cmp = icmp eq i32 %arg, 0
|
|
call void @llvm.amdgcn.kill(i1 %kill)
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi float [ %var, %bb ], [ 0.0, %entry ]
|
|
ret float %ret
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
|
; SI-LABEL: test_kill_divergent_loop:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_execz .LBB10_4
|
|
; SI-NEXT: ; %bb.1: ; %bb.preheader
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: .LBB10_2: ; %bb
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB10_5
|
|
; SI-NEXT: ; %bb.3: ; %bb
|
|
; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_cbranch_vccnz .LBB10_2
|
|
; SI-NEXT: .LBB10_4: ; %Flow1
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 8
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB10_5:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3
|
|
; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb
|
|
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
|
|
; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB10_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3
|
|
; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb
|
|
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
|
|
; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB10_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_kill_divergent_loop:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
|
|
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX11-NEXT: s_cbranch_execz .LBB10_3
|
|
; GFX11-NEXT: .LBB10_1: ; %bb
|
|
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX11-NEXT: ;;#ASMSTART
|
|
; GFX11-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: v_nop_e64
|
|
; GFX11-NEXT: ;;#ASMEND
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB10_4
|
|
; GFX11-NEXT: ; %bb.2: ; %bb
|
|
; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_cbranch_vccnz .LBB10_1
|
|
; GFX11-NEXT: .LBB10_3: ; %Flow1
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: s_nop 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB10_4:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
%vgpr = load volatile i32, ptr addrspace(1) undef
|
|
%loop.cond = icmp eq i32 %vgpr, 0
|
|
br i1 %loop.cond, label %bb, label %exit
|
|
|
|
exit:
|
|
store volatile i32 8, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; bug 28550
|
|
define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
|
; SI-LABEL: phi_use_def_before_kill:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; SI-NEXT: ; %bb.1: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; SI-NEXT: ; %bb.2: ; %bb8
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 8
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; SI-NEXT: .LBB11_3: ; %phibb
|
|
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_cbranch_vccz .LBB11_5
|
|
; SI-NEXT: ; %bb.4: ; %bb10
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 9
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: .LBB11_5: ; %end
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB11_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB11_5: ; %end
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB11_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB11_5: ; %end
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB11_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: phi_use_def_before_kill:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; GFX11-NEXT: ; %bb.1: ; %bb
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; GFX11-NEXT: ; %bb.2: ; %bb8
|
|
; GFX11-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB11_3: ; %phibb
|
|
; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_cbranch_vccz .LBB11_5
|
|
; GFX11-NEXT: ; %bb.4: ; %bb10
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB11_5: ; %end
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB11_6:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fadd float %x, 1.000000e+00
|
|
%tmp1 = fcmp olt float 0.000000e+00, %tmp
|
|
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
|
|
%cmp.tmp2 = fcmp olt float %tmp2, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
|
|
br i1 undef, label %phibb, label %bb8
|
|
|
|
phibb:
|
|
%tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
|
|
%tmp6 = fcmp oeq float %tmp5, 0.000000e+00
|
|
br i1 %tmp6, label %bb10, label %end
|
|
|
|
bb8:
|
|
store volatile i32 8, ptr addrspace(1) undef
|
|
br label %phibb
|
|
|
|
bb10:
|
|
store volatile i32 9, ptr addrspace(1) undef
|
|
br label %end
|
|
|
|
end:
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
|
|
; SI-LABEL: no_skip_no_successors:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_vccz .LBB12_3
|
|
; SI-NEXT: ; %bb.1: ; %bb6
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; SI-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; SI-NEXT: ; %bb.2: ; %bb6
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: .LBB12_3: ; %bb3
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148
|
|
; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0
|
|
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SI-NEXT: ; %bb.4: ; %bb5
|
|
; SI-NEXT: .LBB12_5:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: no_skip_no_successors:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3
|
|
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5
|
|
; GFX10-WAVE64-NEXT: .LBB12_5:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: no_skip_no_successors:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3
|
|
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5
|
|
; GFX10-WAVE32-NEXT: .LBB12_5:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: no_skip_no_successors:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; GFX11-NEXT: s_cbranch_vccz .LBB12_3
|
|
; GFX11-NEXT: ; %bb.1: ; %bb6
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; GFX11-NEXT: ; %bb.2: ; %bb6
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: .LBB12_3: ; %bb3
|
|
; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
|
|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX11-NEXT: ; %bb.4: ; %bb5
|
|
; GFX11-NEXT: .LBB12_5:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fcmp ult float %arg1, 0.000000e+00
|
|
br i1 %tmp, label %bb6, label %bb3
|
|
|
|
bb3: ; preds = %bb
|
|
%tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
|
|
br i1 %tmp2, label %bb5, label %bb4
|
|
|
|
bb4: ; preds = %bb3
|
|
br i1 true, label %bb5, label %bb7
|
|
|
|
bb5: ; preds = %bb4, %bb3
|
|
unreachable
|
|
|
|
bb6: ; preds = %bb
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
unreachable
|
|
|
|
bb7: ; preds = %bb4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
|
|
; SI-LABEL: if_after_kill_block:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_wqm_b64 exec, exec
|
|
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_execz .LBB13_3
|
|
; SI-NEXT: ; %bb.1: ; %bb3
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; SI-NEXT: ; %bb.2: ; %bb3
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: .LBB13_3: ; %bb4
|
|
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB13_5
|
|
; SI-NEXT: ; %bb.4: ; %bb8
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 9
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB13_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: if_after_kill_block:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec
|
|
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB13_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: if_after_kill_block:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB13_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: if_after_kill_block:
|
|
; GFX11: ; %bb.0: ; %bb
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_wqm_b64 exec, exec
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1
|
|
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX11-NEXT: s_cbranch_execz .LBB13_3
|
|
; GFX11-NEXT: ; %bb.1: ; %bb3
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; GFX11-NEXT: ; %bb.2: ; %bb3
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: .LBB13_3: ; %bb4
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0
|
|
; GFX11-NEXT: s_cbranch_execz .LBB13_5
|
|
; GFX11-NEXT: ; %bb.4: ; %bb8
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
|
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB13_6:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fcmp ult float %arg1, 0.000000e+00
|
|
br i1 %tmp, label %bb3, label %bb4
|
|
|
|
bb3: ; preds = %bb
|
|
%cmp.arg = fcmp olt float %arg, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.arg)
|
|
br label %bb4
|
|
|
|
bb4: ; preds = %bb3, %bb
|
|
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
|
%tmp6 = extractelement <4 x float> %tmp5, i32 0
|
|
%tmp7 = fcmp une float %tmp6, 0.000000e+00
|
|
br i1 %tmp7, label %bb8, label %bb9
|
|
|
|
bb8: ; preds = %bb9, %bb4
|
|
store volatile i32 9, ptr addrspace(1) undef
|
|
ret void
|
|
|
|
bb9: ; preds = %bb4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
|
|
; SI-LABEL: cbranch_kill:
|
|
; SI: ; %bb.0: ; %.entry
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_mov_b32_e32 v4, 0
|
|
; SI-NEXT: v_mov_b32_e32 v2, v1
|
|
; SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; SI-NEXT: image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_execz .LBB14_3
|
|
; SI-NEXT: ; %bb.1: ; %kill
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; SI-NEXT: ; implicit-def: $vgpr0
|
|
; SI-NEXT: ; implicit-def: $vgpr1
|
|
; SI-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; SI-NEXT: ; %bb.2: ; %kill
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: .LBB14_3: ; %Flow
|
|
; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
|
; SI-NEXT: ; implicit-def: $vgpr2
|
|
; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: ; %bb.4: ; %live
|
|
; SI-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; SI-NEXT: ; %bb.5: ; %export
|
|
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB14_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: cbranch_kill:
|
|
; GFX10-WAVE64: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow
|
|
; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %live
|
|
; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; GFX10-WAVE64-NEXT: ; %bb.5: ; %export
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB14_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: cbranch_kill:
|
|
; GFX10-WAVE32: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow
|
|
; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %live
|
|
; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; GFX10-WAVE32-NEXT: ; %bb.5: ; %export
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB14_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: cbranch_kill:
|
|
; GFX11: ; %bb.0: ; %.entry
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1
|
|
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX11-NEXT: s_cbranch_execz .LBB14_3
|
|
; GFX11-NEXT: ; %bb.1: ; %kill
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
|
|
; GFX11-NEXT: ; implicit-def: $vgpr0
|
|
; GFX11-NEXT: ; implicit-def: $vgpr1
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; GFX11-NEXT: ; %bb.2: ; %kill
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: .LBB14_3: ; %Flow
|
|
; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
|
; GFX11-NEXT: ; implicit-def: $vgpr2
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: ; %bb.4: ; %live
|
|
; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; GFX11-NEXT: ; %bb.5: ; %export
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB14_6:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
.entry:
|
|
%sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
|
|
%cond0 = fcmp ugt float %sample, 0.000000e+00
|
|
br i1 %cond0, label %live, label %kill
|
|
|
|
kill:
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
br label %export
|
|
|
|
live:
|
|
%scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
|
|
br label %export
|
|
|
|
export:
|
|
%proxy = phi float [ undef, %kill ], [ %scale, %live ]
|
|
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
|
|
ret void
|
|
}
|
|
|
|
|
|
define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
|
|
; SI-LABEL: complex_loop:
|
|
; SI: ; %bb.0: ; %.entry
|
|
; SI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; SI-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; SI-NEXT: ; %bb.1: ; %.lr.ph
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_mov_b32 s6, 0
|
|
; SI-NEXT: s_mov_b64 s[0:1], 0
|
|
; SI-NEXT: s_branch .LBB15_3
|
|
; SI-NEXT: .LBB15_2: ; %latch
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_add_i32 s6, s6, 1
|
|
; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
|
; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: s_cbranch_execz .LBB15_6
|
|
; SI-NEXT: .LBB15_3: ; %hdr
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_execz .LBB15_2
|
|
; SI-NEXT: ; %bb.4: ; %kill
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; SI-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; SI-NEXT: ; %bb.5: ; %kill
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_branch .LBB15_2
|
|
; SI-NEXT: .LBB15_6: ; %Flow
|
|
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB15_7:
|
|
; SI-NEXT: v_mov_b32_e32 v2, -1
|
|
; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB15_8:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: complex_loop:
|
|
; GFX10-WAVE64: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB15_3
|
|
; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1
|
|
; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6
|
|
; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr
|
|
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB15_2
|
|
; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB15_7:
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1
|
|
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB15_8:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: complex_loop:
|
|
; GFX10-WAVE32: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB15_3
|
|
; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3
|
|
; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1
|
|
; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6
|
|
; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr
|
|
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB15_2
|
|
; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB15_7:
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1
|
|
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB15_8:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: complex_loop:
|
|
; GFX11: ; %bb.0: ; %.entry
|
|
; GFX11-NEXT: s_cmp_lt_i32 s0, 1
|
|
; GFX11-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; GFX11-NEXT: ; %bb.1: ; %.lr.ph
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: s_mov_b32 s6, 0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX11-NEXT: s_branch .LBB15_3
|
|
; GFX11-NEXT: .LBB15_2: ; %latch
|
|
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GFX11-NEXT: s_add_i32 s6, s6, 1
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: s_cbranch_execz .LBB15_6
|
|
; GFX11-NEXT: .LBB15_3: ; %hdr
|
|
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX11-NEXT: s_mov_b64 s[4:5], exec
|
|
; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0
|
|
; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; GFX11-NEXT: s_cbranch_execz .LBB15_2
|
|
; GFX11-NEXT: ; %bb.4: ; %kill
|
|
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; GFX11-NEXT: ; %bb.5: ; %kill
|
|
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_branch .LBB15_2
|
|
; GFX11-NEXT: .LBB15_6: ; %Flow
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB15_7:
|
|
; GFX11-NEXT: v_mov_b32_e32 v2, -1
|
|
; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB15_8:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0 off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
.entry:
|
|
%flaga = icmp sgt i32 %cmpa, 0
|
|
br i1 %flaga, label %.lr.ph, label %._crit_edge
|
|
|
|
.lr.ph:
|
|
br label %hdr
|
|
|
|
hdr:
|
|
%ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
|
|
%flagb = icmp ugt i32 %ctr, %cmpb
|
|
br i1 %flagb, label %kill, label %latch
|
|
|
|
kill:
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
br label %latch
|
|
|
|
latch:
|
|
%ctr.next = add nuw nsw i32 %ctr, 1
|
|
%flagc = icmp slt i32 %ctr.next, %cmpc
|
|
br i1 %flagc, label %hdr, label %._crit_edge
|
|
|
|
._crit_edge:
|
|
%tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
|
|
%out = bitcast i32 %tmp to float
|
|
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
|
|
ret void
|
|
}
|
|
|
|
define void @skip_mode_switch(i32 %arg) {
|
|
; WAVE64-LABEL: skip_mode_switch:
|
|
; WAVE64: ; %bb.0: ; %entry
|
|
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; WAVE64-NEXT: s_cbranch_execz .LBB16_2
|
|
; WAVE64-NEXT: ; %bb.1: ; %bb.0
|
|
; WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; WAVE64-NEXT: .LBB16_2: ; %bb.1
|
|
; WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; WAVE64-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-WAVE32-LABEL: skip_mode_switch:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0
|
|
; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
|
; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX11-LABEL: skip_mode_switch:
|
|
; GFX11: ; %bb.0: ; %entry
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
|
|
; GFX11-NEXT: s_cbranch_execz .LBB16_2
|
|
; GFX11-NEXT: ; %bb.1: ; %bb.0
|
|
; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; GFX11-NEXT: .LBB16_2: ; %bb.1
|
|
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb.0, label %bb.1
|
|
|
|
bb.0:
|
|
call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
|
|
br label %bb.1
|
|
|
|
bb.1:
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
|
|
declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare void @llvm.amdgcn.kill(i1) #0
|
|
|
|
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone speculatable }
|
|
attributes #3 = { inaccessiblememonly nounwind writeonly }
|