Files
clang-p2996/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
Brendon Cahoon b32a5666a8 [AMDGPU] Unify uniform return and divergent unreachable blocks
This patch fixes a "failed to annotate CFG" error in
SIAnnotateControlFlow. The problem occurs when there are
divergent and uniform unreachable/return blocks in the same
region. In this case, AMDGPUUnifyDivergentExitNodes does not
create a unified block so the region contains multiple exits.

StructurizeCFG does not work properly when there are multiple
exits, so the neccessary CFG transformations do not occur along
divergent control flow. Subsequently, SIAnnotateControlFlow
processes the path to the divergent exit block, but may only
partially process blocks along a unform control flow path to
another exit block.

This patch fixes the bug by creating a single exit block when
there is a divergent exit block in the function.

Differential revision: https://reviews.llvm.org/D136892
2022-11-29 13:25:56 -06:00

1987 lines
74 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
; GCN-LABEL: test_kill_depth_0_imm_pos:
; GCN: ; %bb.0:
; GCN-NEXT: s_endpgm
call void @llvm.amdgcn.kill(i1 true)
ret void
}
define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
; WAVE64-LABEL: test_kill_depth_0_imm_neg:
; WAVE64: ; %bb.0:
; WAVE64-NEXT: s_andn2_b64 exec, exec, exec
; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1
; WAVE64-NEXT: s_endpgm
; WAVE64-NEXT: .LBB1_1:
; WAVE64-NEXT: s_mov_b64 exec, 0
; WAVE64-NEXT: exp null off, off, off, off done vm
; WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB1_1:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_0_imm_neg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_not1_b64 exec, exec, exec
; GFX11-NEXT: s_cbranch_scc0 .LBB1_1
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB1_1:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.kill(i1 false)
ret void
}
; FIXME: Ideally only one early-exit would be emitted
define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
; WAVE64: ; %bb.0:
; WAVE64-NEXT: s_mov_b64 s[0:1], exec
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
; WAVE64-NEXT: ; %bb.1:
; WAVE64-NEXT: s_mov_b64 exec, 0
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
; WAVE64-NEXT: s_endpgm
; WAVE64-NEXT: .LBB2_2:
; WAVE64-NEXT: s_mov_b64 exec, 0
; WAVE64-NEXT: exp null off, off, off, off done vm
; WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
; GFX10-WAVE32-NEXT: ; %bb.1:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB2_2:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: ; %bb.1:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB2_2:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.kill(i1 false)
ret void
}
define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
; WAVE64-LABEL: test_kill_depth_var:
; WAVE64: ; %bb.0:
; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1
; WAVE64-NEXT: s_endpgm
; WAVE64-NEXT: .LBB3_1:
; WAVE64-NEXT: s_mov_b64 exec, 0
; WAVE64-NEXT: exp null off, off, off, off done vm
; WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_var:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB3_1:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_var:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB3_1
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB3_1:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
%cmp = fcmp olt float %x, 0.0
call void @llvm.amdgcn.kill(i1 %cmp)
ret void
}
; FIXME: Ideally only one early-exit would be emitted
define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
; SI-LABEL: test_kill_depth_var_x2_same:
; SI: ; %bb.0:
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB4_2
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB4_2
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB4_2:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
; GFX10-WAVE64: ; %bb.0:
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
; GFX10-WAVE64-NEXT: ; %bb.1:
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB4_2:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
; GFX10-WAVE32-NEXT: ; %bb.1:
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB4_2:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_var_x2_same:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
; GFX11-NEXT: ; %bb.1:
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB4_2:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
%cmp = fcmp olt float %x, 0.0
call void @llvm.amdgcn.kill(i1 %cmp)
call void @llvm.amdgcn.kill(i1 %cmp)
ret void
}
; FIXME: Ideally only one early-exit would be emitted
define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
; SI-LABEL: test_kill_depth_var_x2:
; SI: ; %bb.0:
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB5_2
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB5_2
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB5_2:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
; GFX10-WAVE64: ; %bb.0:
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-WAVE64-NEXT: ; %bb.1:
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB5_2:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-WAVE32-NEXT: ; %bb.1:
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB5_2:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_var_x2:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
; GFX11-NEXT: ; %bb.1:
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB5_2:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
%cmp.x = fcmp olt float %x, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.x)
%cmp.y = fcmp olt float %y, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.y)
ret void
}
define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
; SI-LABEL: test_kill_depth_var_x2_instructions:
; SI: ; %bb.0:
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB6_2
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v7, -1
; SI-NEXT: ;;#ASMEND
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB6_2
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB6_2:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
; GFX10-WAVE64: ; %bb.0:
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
; GFX10-WAVE64-NEXT: ; %bb.1:
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB6_2:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
; GFX10-WAVE32: ; %bb.0:
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
; GFX10-WAVE32-NEXT: ; %bb.1:
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB6_2:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_depth_var_x2_instructions:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
; GFX11-NEXT: ; %bb.1:
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v7, -1
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB6_2:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
%cmp.x = fcmp olt float %x, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.x)
%y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
%cmp.y = fcmp olt float %y, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.y)
ret void
}
; FIXME: why does the skip depend on the asm length in the same block?
define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
; SI-LABEL: test_kill_control_flow:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_cmp_lg_u32 s0, 0
; SI-NEXT: s_cbranch_scc0 .LBB7_2
; SI-NEXT: ; %bb.1: ; %exit
; SI-NEXT: v_mov_b32_e32 v0, 1.0
; SI-NEXT: s_branch .LBB7_5
; SI-NEXT: .LBB7_2: ; %bb
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v7, -1
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: ;;#ASMEND
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; SI-NEXT: s_cbranch_scc0 .LBB7_4
; SI-NEXT: ; %bb.3: ; %bb
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: v_mov_b32_e32 v0, 1.0
; SI-NEXT: s_branch .LBB7_5
; SI-NEXT: .LBB7_4:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB7_5:
;
; GFX10-WAVE64-LABEL: test_kill_control_flow:
; GFX10-WAVE64: ; %bb.0: ; %entry
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
; GFX10-WAVE64-NEXT: .LBB7_4:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB7_5:
;
; GFX10-WAVE32-LABEL: test_kill_control_flow:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
; GFX10-WAVE32-NEXT: .LBB7_4:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB7_5:
;
; GFX11-LABEL: test_kill_control_flow:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB7_2
; GFX11-NEXT: ; %bb.1: ; %exit
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
; GFX11-NEXT: s_branch .LBB7_5
; GFX11-NEXT: .LBB7_2: ; %bb
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v7, -1
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB7_4
; GFX11-NEXT: ; %bb.3: ; %bb
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
; GFX11-NEXT: s_branch .LBB7_5
; GFX11-NEXT: .LBB7_4:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB7_5:
entry:
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %bb, label %exit
bb:
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", "={v7}"()
%cmp.var = fcmp olt float %var, 0.0
; TODO: We could do an early-exit here (the branch above is uniform!)
call void @llvm.amdgcn.kill(i1 %cmp.var)
br label %exit
exit:
ret float 1.0
}
define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
; SI-LABEL: test_kill_control_flow_remainder:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_cmp_lg_u32 s0, 0
; SI-NEXT: v_mov_b32_e32 v9, 0
; SI-NEXT: s_cbranch_scc1 .LBB8_3
; SI-NEXT: ; %bb.1: ; %bb
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v7, -1
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: ;;#ASMEND
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v8, -1
; SI-NEXT: ;;#ASMEND
; SI-NEXT: s_cbranch_scc0 .LBB8_4
; SI-NEXT: ; %bb.2: ; %bb
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v9, -2
; SI-NEXT: ;;#ASMEND
; SI-NEXT: .LBB8_3: ; %exit
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB8_4:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
; GFX10-WAVE64: ; %bb.0: ; %entry
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB8_4:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB8_4:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_control_flow_remainder:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: v_mov_b32_e32 v9, 0
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB8_2
; GFX11-NEXT: ; %bb.1: ; %exit
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB8_2: ; %bb
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v7, -1
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v8, -1
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB8_4
; GFX11-NEXT: ; %bb.3: ; %bb
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v9, -2
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: global_store_b32 v[0:1], v9, off
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB8_4:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %bb, label %exit
bb:
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", "={v7}"()
%live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
%cmp.var = fcmp olt float %var, 0.0
; TODO: We could do an early-exit here (the branch above is uniform!)
call void @llvm.amdgcn.kill(i1 %cmp.var)
store volatile float %live.across, float addrspace(1)* undef
%live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
br label %exit
exit:
%phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
store float %phi, float addrspace(1)* undef
ret void
}
define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
; SI-LABEL: test_kill_control_flow_return:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_cmp_eq_u32 s0, 1
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; SI-NEXT: s_cbranch_scc0 .LBB9_4
; SI-NEXT: ; %bb.1: ; %entry
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
; SI-NEXT: s_cmp_lg_u32 s0, 0
; SI-NEXT: v_mov_b32_e32 v0, 0
; SI-NEXT: s_cbranch_scc0 .LBB9_3
; SI-NEXT: ; %bb.2: ; %exit
; SI-NEXT: s_branch .LBB9_5
; SI-NEXT: .LBB9_3: ; %bb
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v7, -1
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: ;;#ASMEND
; SI-NEXT: v_mov_b32_e32 v0, v7
; SI-NEXT: s_branch .LBB9_5
; SI-NEXT: .LBB9_4:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB9_5:
;
; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
; GFX10-WAVE64: ; %bb.0: ; %entry
; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3
; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
; GFX10-WAVE64-NEXT: .LBB9_4:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB9_5:
;
; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3
; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
; GFX10-WAVE32-NEXT: .LBB9_4:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB9_5:
;
; GFX11-LABEL: test_kill_control_flow_return:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_cmp_eq_u32 s0, 1
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5]
; GFX11-NEXT: s_cbranch_scc0 .LBB9_4
; GFX11-NEXT: ; %bb.1: ; %entry
; GFX11-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_cmp_lg_u32 s0, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB9_3
; GFX11-NEXT: ; %bb.2: ; %exit
; GFX11-NEXT: s_branch .LBB9_5
; GFX11-NEXT: .LBB9_3: ; %bb
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v7, -1
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_mov_b32_e32 v0, v7
; GFX11-NEXT: s_branch .LBB9_5
; GFX11-NEXT: .LBB9_4:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB9_5:
entry:
%kill = icmp eq i32 %arg, 1
%cmp = icmp eq i32 %arg, 0
call void @llvm.amdgcn.kill(i1 %kill)
br i1 %cmp, label %bb, label %exit
bb:
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", "={v7}"()
br label %exit
exit:
%ret = phi float [ %var, %bb ], [ 0.0, %entry ]
ret float %ret
}
define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
; SI-LABEL: test_kill_divergent_loop:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3]
; SI-NEXT: s_cbranch_execz .LBB10_4
; SI-NEXT: ; %bb.1: ; %bb.preheader
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: .LBB10_2: ; %bb
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: v_mov_b32_e64 v7, -1
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: v_nop_e64
; SI-NEXT: ;;#ASMEND
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB10_5
; SI-NEXT: ; %bb.3: ; %bb
; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; SI-NEXT: s_cbranch_vccnz .LBB10_2
; SI-NEXT: .LBB10_4: ; %Flow1
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 8
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB10_5:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
; GFX10-WAVE64: ; %bb.0: ; %entry
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3
; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-WAVE64-NEXT: ;;#ASMSTART
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: v_nop_e64
; GFX10-WAVE64-NEXT: ;;#ASMEND
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB10_4:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3
; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-WAVE32-NEXT: ;;#ASMSTART
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: v_nop_e64
; GFX10-WAVE32-NEXT: ;;#ASMEND
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB10_4:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: test_kill_divergent_loop:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX11-NEXT: s_cbranch_execz .LBB10_3
; GFX11-NEXT: .LBB10_1: ; %bb
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: v_mov_b32_e64 v7, -1
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: v_nop_e64
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB10_4
; GFX11-NEXT: ; %bb.2: ; %bb
; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX11-NEXT: s_cbranch_vccnz .LBB10_1
; GFX11-NEXT: .LBB10_3: ; %Flow1
; GFX11-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX11-NEXT: v_mov_b32_e32 v0, 8
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB10_4:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %bb, label %exit
bb:
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64
v_nop_e64", "={v7}"()
%cmp.var = fcmp olt float %var, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.var)
%vgpr = load volatile i32, i32 addrspace(1)* undef
%loop.cond = icmp eq i32 %vgpr, 0
br i1 %loop.cond, label %bb, label %exit
exit:
store volatile i32 8, i32 addrspace(1)* undef
ret void
}
; bug 28550
define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
; SI-LABEL: phi_use_def_before_kill:
; SI: ; %bb.0: ; %bb
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: s_cbranch_scc0 .LBB11_6
; SI-NEXT: ; %bb.1: ; %bb
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: s_cbranch_scc0 .LBB11_3
; SI-NEXT: ; %bb.2: ; %bb8
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 8
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 4.0
; SI-NEXT: .LBB11_3: ; %phibb
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; SI-NEXT: s_cbranch_vccz .LBB11_5
; SI-NEXT: ; %bb.4: ; %bb10
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 9
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: .LBB11_5: ; %end
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB11_6:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
; GFX10-WAVE64: ; %bb.0: ; %bb
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: .LBB11_5: ; %end
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB11_6:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
; GFX10-WAVE32: ; %bb.0: ; %bb
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: .LBB11_5: ; %end
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB11_6:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: phi_use_def_before_kill:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
; GFX11-NEXT: ; %bb.1: ; %bb
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB11_3
; GFX11-NEXT: ; %bb.2: ; %bb8
; GFX11-NEXT: v_mov_b32_e32 v1, 8
; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB11_3: ; %phibb
; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_cbranch_vccz .LBB11_5
; GFX11-NEXT: ; %bb.4: ; %bb10
; GFX11-NEXT: v_mov_b32_e32 v0, 9
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB11_5: ; %end
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB11_6:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
bb:
%tmp = fadd float %x, 1.000000e+00
%tmp1 = fcmp olt float 0.000000e+00, %tmp
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
%cmp.tmp2 = fcmp olt float %tmp2, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
br i1 undef, label %phibb, label %bb8
phibb:
%tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
%tmp6 = fcmp oeq float %tmp5, 0.000000e+00
br i1 %tmp6, label %bb10, label %end
bb8:
store volatile i32 8, i32 addrspace(1)* undef
br label %phibb
bb10:
store volatile i32 9, i32 addrspace(1)* undef
br label %end
end:
ret void
}
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
; SI-LABEL: no_skip_no_successors:
; SI: ; %bb.0: ; %bb
; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
; SI-NEXT: s_cbranch_vccz .LBB12_3
; SI-NEXT: ; %bb.1: ; %bb6
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; SI-NEXT: s_cbranch_scc0 .LBB12_5
; SI-NEXT: ; %bb.2: ; %bb6
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: .LBB12_3: ; %bb3
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148
; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0
; SI-NEXT: s_and_b64 vcc, exec, vcc
; SI-NEXT: ; %bb.4: ; %bb5
; SI-NEXT: .LBB12_5:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: no_skip_no_successors:
; GFX10-WAVE64: ; %bb.0: ; %bb
; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5]
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5
; GFX10-WAVE64-NEXT: .LBB12_5:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: no_skip_no_successors:
; GFX10-WAVE32: ; %bb.0: ; %bb
; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6
; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5
; GFX10-WAVE32-NEXT: .LBB12_5:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: no_skip_no_successors:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5]
; GFX11-NEXT: s_cbranch_vccz .LBB12_3
; GFX11-NEXT: ; %bb.1: ; %bb6
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
; GFX11-NEXT: s_cbranch_scc0 .LBB12_5
; GFX11-NEXT: ; %bb.2: ; %bb6
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: .LBB12_3: ; %bb3
; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX11-NEXT: ; %bb.4: ; %bb5
; GFX11-NEXT: .LBB12_5:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
bb:
%tmp = fcmp ult float %arg1, 0.000000e+00
br i1 %tmp, label %bb6, label %bb3
bb3: ; preds = %bb
%tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
br i1 %tmp2, label %bb5, label %bb4
bb4: ; preds = %bb3
br i1 true, label %bb5, label %bb7
bb5: ; preds = %bb4, %bb3
unreachable
bb6: ; preds = %bb
call void @llvm.amdgcn.kill(i1 false)
unreachable
bb7: ; preds = %bb4
ret void
}
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
; SI-LABEL: if_after_kill_block:
; SI: ; %bb.0: ; %bb
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: s_wqm_b64 exec, exec
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; SI-NEXT: s_cbranch_execz .LBB13_3
; SI-NEXT: ; %bb.1: ; %bb3
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; SI-NEXT: s_cbranch_scc0 .LBB13_6
; SI-NEXT: ; %bb.2: ; %bb3
; SI-NEXT: s_andn2_b64 exec, exec, vcc
; SI-NEXT: .LBB13_3: ; %bb4
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
; SI-NEXT: s_cbranch_execz .LBB13_5
; SI-NEXT: ; %bb.4: ; %bb8
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 9
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB13_6:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: if_after_kill_block:
; GFX10-WAVE64: ; %bb.0: ; %bb
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB13_6:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: if_after_kill_block:
; GFX10-WAVE32: ; %bb.0: ; %bb
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB13_6:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: if_after_kill_block:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_wqm_b64 exec, exec
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX11-NEXT: s_cbranch_execz .LBB13_3
; GFX11-NEXT: ; %bb.1: ; %bb3
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB13_6
; GFX11-NEXT: ; %bb.2: ; %bb3
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: .LBB13_3: ; %bb4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0
; GFX11-NEXT: s_cbranch_execz .LBB13_5
; GFX11-NEXT: ; %bb.4: ; %bb8
; GFX11-NEXT: v_mov_b32_e32 v0, 9
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB13_6:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
bb:
%tmp = fcmp ult float %arg1, 0.000000e+00
br i1 %tmp, label %bb3, label %bb4
bb3: ; preds = %bb
%cmp.arg = fcmp olt float %arg, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.arg)
br label %bb4
bb4: ; preds = %bb3, %bb
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp6 = extractelement <4 x float> %tmp5, i32 0
%tmp7 = fcmp une float %tmp6, 0.000000e+00
br i1 %tmp7, label %bb8, label %bb9
bb8: ; preds = %bb9, %bb4
store volatile i32 9, i32 addrspace(1)* undef
ret void
bb9: ; preds = %bb4
ret void
}
define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
; SI-LABEL: cbranch_kill:
; SI: ; %bb.0: ; %.entry
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: v_mov_b32_e32 v4, 0
; SI-NEXT: v_mov_b32_e32 v2, v1
; SI-NEXT: v_mov_b32_e32 v3, v1
; SI-NEXT: image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; SI-NEXT: s_cbranch_execz .LBB14_3
; SI-NEXT: ; %bb.1: ; %kill
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: ; implicit-def: $vgpr1
; SI-NEXT: s_cbranch_scc0 .LBB14_6
; SI-NEXT: ; %bb.2: ; %kill
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: .LBB14_3: ; %Flow
; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; SI-NEXT: ; implicit-def: $vgpr2
; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
; SI-NEXT: ; %bb.4: ; %live
; SI-NEXT: v_mul_f32_e32 v2, v0, v1
; SI-NEXT: ; %bb.5: ; %export
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_6:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: cbranch_kill:
; GFX10-WAVE64: ; %bb.0: ; %.entry
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3
; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6
; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow
; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2
; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX10-WAVE64-NEXT: ; %bb.4: ; %live
; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1
; GFX10-WAVE64-NEXT: ; %bb.5: ; %export
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB14_6:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: cbranch_kill:
; GFX10-WAVE32: ; %bb.0: ; %.entry
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3
; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6
; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow
; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2
; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX10-WAVE32-NEXT: ; %bb.4: ; %live
; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1
; GFX10-WAVE32-NEXT: ; %bb.5: ; %export
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB14_6:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: cbranch_kill:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1
; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; GFX11-NEXT: s_cbranch_execz .LBB14_3
; GFX11-NEXT: ; %bb.1: ; %kill
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
; GFX11-NEXT: ; implicit-def: $vgpr0
; GFX11-NEXT: ; implicit-def: $vgpr1
; GFX11-NEXT: s_cbranch_scc0 .LBB14_6
; GFX11-NEXT: ; %bb.2: ; %kill
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: .LBB14_3: ; %Flow
; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; GFX11-NEXT: ; implicit-def: $vgpr2
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX11-NEXT: ; %bb.4: ; %live
; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1
; GFX11-NEXT: ; %bb.5: ; %export
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB14_6:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
.entry:
%sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
%cond0 = fcmp ugt float %sample, 0.000000e+00
br i1 %cond0, label %live, label %kill
kill:
call void @llvm.amdgcn.kill(i1 false)
br label %export
live:
%scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
br label %export
export:
%proxy = phi float [ undef, %kill ], [ %scale, %live ]
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
ret void
}
define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
; SI-LABEL: complex_loop:
; SI: ; %bb.0: ; %.entry
; SI-NEXT: s_cmp_lt_i32 s0, 1
; SI-NEXT: s_cbranch_scc1 .LBB15_7
; SI-NEXT: ; %bb.1: ; %.lr.ph
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b64 s[0:1], 0
; SI-NEXT: s_branch .LBB15_3
; SI-NEXT: .LBB15_2: ; %latch
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_add_i32 s6, s6, 1
; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; SI-NEXT: v_mov_b32_e32 v2, s6
; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
; SI-NEXT: s_cbranch_execz .LBB15_6
; SI-NEXT: .LBB15_3: ; %hdr
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; SI-NEXT: s_cbranch_execz .LBB15_2
; SI-NEXT: ; %bb.4: ; %kill
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; SI-NEXT: s_cbranch_scc0 .LBB15_8
; SI-NEXT: ; %bb.5: ; %kill
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: s_branch .LBB15_2
; SI-NEXT: .LBB15_6: ; %Flow
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB15_7:
; SI-NEXT: v_mov_b32_e32 v2, -1
; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB15_8:
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: exp null off, off, off, off done vm
; SI-NEXT: s_endpgm
;
; GFX10-WAVE64-LABEL: complex_loop:
; GFX10-WAVE64: ; %bb.0: ; %.entry
; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1
; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7
; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0
; GFX10-WAVE64-NEXT: s_branch .LBB15_3
; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1
; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6
; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6
; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2
; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8
; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: s_branch .LBB15_2
; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB15_7:
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; GFX10-WAVE64-NEXT: s_endpgm
; GFX10-WAVE64-NEXT: .LBB15_8:
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE64-NEXT: s_endpgm
;
; GFX10-WAVE32-LABEL: complex_loop:
; GFX10-WAVE32: ; %bb.0: ; %.entry
; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1
; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7
; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0
; GFX10-WAVE32-NEXT: s_branch .LBB15_3
; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1
; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2
; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6
; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2
; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8
; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: s_branch .LBB15_2
; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB15_7:
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
; GFX10-WAVE32-NEXT: s_endpgm
; GFX10-WAVE32-NEXT: .LBB15_8:
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
; GFX10-WAVE32-NEXT: s_endpgm
;
; GFX11-LABEL: complex_loop:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: s_cmp_lt_i32 s0, 1
; GFX11-NEXT: s_cbranch_scc1 .LBB15_7
; GFX11-NEXT: ; %bb.1: ; %.lr.ph
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_mov_b32 s6, 0
; GFX11-NEXT: s_mov_b64 s[0:1], 0
; GFX11-NEXT: s_branch .LBB15_3
; GFX11-NEXT: .LBB15_2: ; %latch
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX11-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX11-NEXT: s_add_i32 s6, s6, 1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
; GFX11-NEXT: v_mov_b32_e32 v2, s6
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
; GFX11-NEXT: s_cbranch_execz .LBB15_6
; GFX11-NEXT: .LBB15_3: ; %hdr
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_mov_b64 s[4:5], exec
; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0
; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX11-NEXT: s_cbranch_execz .LBB15_2
; GFX11-NEXT: ; %bb.4: ; %kill
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
; GFX11-NEXT: s_cbranch_scc0 .LBB15_8
; GFX11-NEXT: ; %bb.5: ; %kill
; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: s_branch .LBB15_2
; GFX11-NEXT: .LBB15_6: ; %Flow
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB15_7:
; GFX11-NEXT: v_mov_b32_e32 v2, -1
; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB15_8:
; GFX11-NEXT: s_mov_b64 exec, 0
; GFX11-NEXT: exp mrt0 off, off, off, off done
; GFX11-NEXT: s_endpgm
.entry:
%flaga = icmp sgt i32 %cmpa, 0
br i1 %flaga, label %.lr.ph, label %._crit_edge
.lr.ph:
br label %hdr
hdr:
%ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
%flagb = icmp ugt i32 %ctr, %cmpb
br i1 %flagb, label %kill, label %latch
kill:
call void @llvm.amdgcn.kill(i1 false)
br label %latch
latch:
%ctr.next = add nuw nsw i32 %ctr, 1
%flagc = icmp slt i32 %ctr.next, %cmpc
br i1 %flagc, label %hdr, label %._crit_edge
._crit_edge:
%tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
%out = bitcast i32 %tmp to float
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
ret void
}
define void @skip_mode_switch(i32 %arg) {
; SI-LABEL: skip_mode_switch:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_cbranch_execz .LBB16_2
; SI-NEXT: ; %bb.1: ; %bb.0
; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
; SI-NEXT: .LBB16_2: ; %bb.1
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-WAVE64-LABEL: skip_mode_switch:
; GFX10-WAVE64: ; %bb.0: ; %entry
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0
; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-WAVE32-LABEL: skip_mode_switch:
; GFX10-WAVE32: ; %bb.0: ; %entry
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0
; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: skip_mode_switch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b64 s[0:1], exec
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX11-NEXT: s_cbranch_execz .LBB16_2
; GFX11-NEXT: ; %bb.1: ; %bb.0
; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
; GFX11-NEXT: .LBB16_2: ; %bb.1
; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%cmp = icmp eq i32 %arg, 0
br i1 %cmp, label %bb.0, label %bb.1
bb.0:
call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
br label %bb.1
bb.1:
ret void
}
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.amdgcn.kill(i1) #0
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { inaccessiblememonly nounwind writeonly }