The compiler was generating symbols in the final code object for local branch target labels. This bloats the code object, slows down the loader, and is only used to simplify disassembly. Use '--symbolize-operands' with llvm-objdump to improve readability of the branch target operands in disassembly. Fixes: SWDEV-312223 Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D114273
1590 lines
59 KiB
LLVM
1590 lines
59 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
|
|
|
|
define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
|
|
; GCN-LABEL: test_kill_depth_0_imm_pos:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 true)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
|
|
; WAVE64-LABEL: test_kill_depth_0_imm_neg:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: s_andn2_b64 exec, exec, exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB1_1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB1_1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
|
|
; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; WAVE64-NEXT: ; %bb.1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB2_2:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB2_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
|
|
; WAVE64-LABEL: test_kill_depth_var:
|
|
; WAVE64: ; %bb.0:
|
|
; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1
|
|
; WAVE64-NEXT: s_endpgm
|
|
; WAVE64-NEXT: .LBB3_1:
|
|
; WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB3_1:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
%cmp = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2_same:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB4_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB4_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB4_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
%cmp = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
call void @llvm.amdgcn.kill(i1 %cmp)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Ideally only one early-exit would be emitted
|
|
define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB5_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB5_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB5_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
%cmp.x = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.x)
|
|
%cmp.y = fcmp olt float %y, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.y)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
|
|
; SI-LABEL: test_kill_depth_var_x2_instructions:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB6_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
|
|
; GFX10-WAVE64: ; %bb.0:
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1:
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB6_2:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
|
|
; GFX10-WAVE32: ; %bb.0:
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1:
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB6_2:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
%cmp.x = fcmp olt float %x, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.x)
|
|
%y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
|
|
%cmp.y = fcmp olt float %y, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.y)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: why does the skip depend on the asm length in the same block?
|
|
define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; SI-NEXT: ; %bb.1: ; %exit
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; SI-NEXT: s_branch .LBB7_5
|
|
; SI-NEXT: .LBB7_2: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; SI-NEXT: ; %bb.3: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; SI-NEXT: s_branch .LBB7_5
|
|
; SI-NEXT: .LBB7_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB7_5:
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE64-NEXT: .LBB7_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB7_5:
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB7_5
|
|
; GFX10-WAVE32-NEXT: .LBB7_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB7_5:
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
; TODO: We could do an early-exit here (the branch above is uniform!)
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
br label %exit
|
|
|
|
exit:
|
|
ret float 1.0
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow_remainder:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: v_mov_b32_e32 v9, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB8_3
|
|
; SI-NEXT: ; %bb.1: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v8, -1
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; SI-NEXT: ; %bb.2: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v9, -2
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: .LBB8_3: ; %exit
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB8_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB8_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB8_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
; TODO: We could do an early-exit here (the branch above is uniform!)
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
store volatile float %live.across, float addrspace(1)* undef
|
|
%live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
|
|
br label %exit
|
|
|
|
exit:
|
|
%phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
|
|
store float %phi, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
|
|
; SI-LABEL: test_kill_control_flow_return:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_cmp_eq_u32 s0, 1
|
|
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
|
|
; SI-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; SI-NEXT: ; %bb.1: ; %entry
|
|
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; SI-NEXT: ; %bb.2: ; %exit
|
|
; SI-NEXT: s_branch .LBB9_5
|
|
; SI-NEXT: .LBB9_3: ; %bb
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_mov_b32_e32 v0, v7
|
|
; SI-NEXT: s_branch .LBB9_5
|
|
; SI-NEXT: .LBB9_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB9_5:
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE64-NEXT: .LBB9_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB9_5:
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB9_5
|
|
; GFX10-WAVE32-NEXT: .LBB9_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB9_5:
|
|
entry:
|
|
%kill = icmp eq i32 %arg, 1
|
|
%cmp = icmp eq i32 %arg, 0
|
|
call void @llvm.amdgcn.kill(i1 %kill)
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
br label %exit
|
|
|
|
exit:
|
|
%ret = phi float [ %var, %bb ], [ 0.0, %entry ]
|
|
ret float %ret
|
|
}
|
|
|
|
define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
|
|
; SI-LABEL: test_kill_divergent_loop:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_execz .LBB10_4
|
|
; SI-NEXT: ; %bb.1: ; %bb.preheader
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: .LBB10_2: ; %bb
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: v_mov_b32_e64 v7, -1
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: v_nop_e64
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB10_5
|
|
; SI-NEXT: ; %bb.3: ; %bb
|
|
; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SI-NEXT: s_cbranch_vccnz .LBB10_2
|
|
; SI-NEXT: .LBB10_4: ; %Flow1
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 8
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB10_5:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3
|
|
; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb
|
|
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE64-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: v_nop_e64
|
|
; GFX10-WAVE64-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
|
|
; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB10_4:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3
|
|
; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb
|
|
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE32-NEXT: ;;#ASMSTART
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: v_nop_e64
|
|
; GFX10-WAVE32-NEXT: ;;#ASMEND
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
|
|
; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB10_4:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb, label %exit
|
|
|
|
bb:
|
|
%var = call float asm sideeffect "v_mov_b32_e64 v7, -1
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64
|
|
v_nop_e64", "={v7}"()
|
|
%cmp.var = fcmp olt float %var, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.var)
|
|
%vgpr = load volatile i32, i32 addrspace(1)* undef
|
|
%loop.cond = icmp eq i32 %vgpr, 0
|
|
br i1 %loop.cond, label %bb, label %exit
|
|
|
|
exit:
|
|
store volatile i32 8, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
; bug 28550
|
|
define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
|
|
; SI-LABEL: phi_use_def_before_kill:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; SI-NEXT: ; %bb.1: ; %bb
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; SI-NEXT: ; %bb.2: ; %bb8
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 8
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; SI-NEXT: .LBB11_3: ; %phibb
|
|
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SI-NEXT: s_cbranch_vccz .LBB11_5
|
|
; SI-NEXT: ; %bb.4: ; %bb10
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 9
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: .LBB11_5: ; %end
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB11_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB11_5: ; %end
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB11_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
|
|
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB11_5: ; %end
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB11_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fadd float %x, 1.000000e+00
|
|
%tmp1 = fcmp olt float 0.000000e+00, %tmp
|
|
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
|
|
%cmp.tmp2 = fcmp olt float %tmp2, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
|
|
br i1 undef, label %phibb, label %bb8
|
|
|
|
phibb:
|
|
%tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
|
|
%tmp6 = fcmp oeq float %tmp5, 0.000000e+00
|
|
br i1 %tmp6, label %bb10, label %end
|
|
|
|
bb8:
|
|
store volatile i32 8, i32 addrspace(1)* undef
|
|
br label %phibb
|
|
|
|
bb10:
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
br label %end
|
|
|
|
end:
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
|
|
; SI-LABEL: no_skip_no_successors:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_vccz .LBB12_3
|
|
; SI-NEXT: ; %bb.1: ; %bb6
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; SI-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; SI-NEXT: ; %bb.2: ; %bb6
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: .LBB12_3: ; %bb3
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148
|
|
; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0
|
|
; SI-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SI-NEXT: ; %bb.4: ; %bb5
|
|
; SI-NEXT: .LBB12_5:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: no_skip_no_successors:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3
|
|
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
|
|
; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5
|
|
; GFX10-WAVE64-NEXT: .LBB12_5:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: no_skip_no_successors:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3
|
|
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
|
|
; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5
|
|
; GFX10-WAVE32-NEXT: .LBB12_5:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fcmp ult float %arg1, 0.000000e+00
|
|
%tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
|
|
br i1 %tmp, label %bb6, label %bb3
|
|
|
|
bb3: ; preds = %bb
|
|
br i1 %tmp2, label %bb5, label %bb4
|
|
|
|
bb4: ; preds = %bb3
|
|
br i1 true, label %bb5, label %bb7
|
|
|
|
bb5: ; preds = %bb4, %bb3
|
|
unreachable
|
|
|
|
bb6: ; preds = %bb
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
unreachable
|
|
|
|
bb7: ; preds = %bb4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
|
|
; SI-LABEL: if_after_kill_block:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_wqm_b64 exec, exec
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_execz .LBB13_3
|
|
; SI-NEXT: ; %bb.1: ; %bb3
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; SI-NEXT: ; %bb.2: ; %bb3
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: .LBB13_3: ; %bb4
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_mov_b32 s1, s0
|
|
; SI-NEXT: s_mov_b32 s2, s0
|
|
; SI-NEXT: s_mov_b32 s3, s0
|
|
; SI-NEXT: s_mov_b32 s4, s0
|
|
; SI-NEXT: s_mov_b32 s5, s0
|
|
; SI-NEXT: s_mov_b32 s6, s0
|
|
; SI-NEXT: s_mov_b32 s7, s0
|
|
; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB13_5
|
|
; SI-NEXT: ; %bb.4: ; %bb8
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 9
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB13_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: if_after_kill_block:
|
|
; GFX10-WAVE64: ; %bb.0: ; %bb
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec
|
|
; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3
|
|
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s3, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s4, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s5, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s6, s0
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s7, s0
|
|
; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB13_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: if_after_kill_block:
|
|
; GFX10-WAVE32: ; %bb.0: ; %bb
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo
|
|
; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3
|
|
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
|
|
; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s3, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s4, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s5, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s6, s0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s7, s0
|
|
; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
|
|
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB13_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = fcmp ult float %arg1, 0.000000e+00
|
|
br i1 %tmp, label %bb3, label %bb4
|
|
|
|
bb3: ; preds = %bb
|
|
%cmp.arg = fcmp olt float %arg, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %cmp.arg)
|
|
br label %bb4
|
|
|
|
bb4: ; preds = %bb3, %bb
|
|
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
|
%tmp6 = extractelement <4 x float> %tmp5, i32 0
|
|
%tmp7 = fcmp une float %tmp6, 0.000000e+00
|
|
br i1 %tmp7, label %bb8, label %bb9
|
|
|
|
bb8: ; preds = %bb9, %bb4
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
ret void
|
|
|
|
bb9: ; preds = %bb4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
|
|
; SI-LABEL: cbranch_kill:
|
|
; SI: ; %bb.0: ; %.entry
|
|
; SI-NEXT: s_mov_b32 s4, 0
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: v_mov_b32_e32 v2, v1
|
|
; SI-NEXT: v_mov_b32_e32 v3, v1
|
|
; SI-NEXT: s_mov_b32 s5, s4
|
|
; SI-NEXT: s_mov_b32 s6, s4
|
|
; SI-NEXT: s_mov_b32 s7, s4
|
|
; SI-NEXT: s_mov_b32 s8, s4
|
|
; SI-NEXT: s_mov_b32 s9, s4
|
|
; SI-NEXT: s_mov_b32 s10, s4
|
|
; SI-NEXT: s_mov_b32 s11, s4
|
|
; SI-NEXT: image_sample_lz v1, v[1:3], s[4:11], s[0:3] dmask:0x1 da
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_execz .LBB14_3
|
|
; SI-NEXT: ; %bb.1: ; %kill
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; SI-NEXT: ; implicit-def: $vgpr0
|
|
; SI-NEXT: ; implicit-def: $vgpr1
|
|
; SI-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; SI-NEXT: ; %bb.2: ; %kill
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: .LBB14_3: ; %Flow
|
|
; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
|
; SI-NEXT: ; implicit-def: $vgpr2
|
|
; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: ; %bb.4: ; %live
|
|
; SI-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; SI-NEXT: ; %bb.5: ; %export
|
|
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB14_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: cbranch_kill:
|
|
; GFX10-WAVE64: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s6, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s7, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s8, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4
|
|
; GFX10-WAVE64-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow
|
|
; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
|
|
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %live
|
|
; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; GFX10-WAVE64-NEXT: ; %bb.5: ; %export
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB14_6:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: cbranch_kill:
|
|
; GFX10-WAVE32: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s6, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s7, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s8, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4
|
|
; GFX10-WAVE32-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6
|
|
; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow
|
|
; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1
|
|
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %live
|
|
; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1
|
|
; GFX10-WAVE32-NEXT: ; %bb.5: ; %export
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB14_6:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
.entry:
|
|
%sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
|
|
%cond0 = fcmp ugt float %sample, 0.000000e+00
|
|
br i1 %cond0, label %live, label %kill
|
|
|
|
kill:
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
br label %export
|
|
|
|
live:
|
|
%scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
|
|
br label %export
|
|
|
|
export:
|
|
%proxy = phi float [ undef, %kill ], [ %scale, %live ]
|
|
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
|
|
ret void
|
|
}
|
|
|
|
|
|
define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
|
|
; SI-LABEL: complex_loop:
|
|
; SI: ; %bb.0: ; %.entry
|
|
; SI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; SI-NEXT: v_mov_b32_e32 v2, -1
|
|
; SI-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; SI-NEXT: ; %bb.1: ; %.lr.ph
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_mov_b32 s6, 0
|
|
; SI-NEXT: s_mov_b64 s[0:1], 0
|
|
; SI-NEXT: s_branch .LBB15_3
|
|
; SI-NEXT: .LBB15_2: ; %latch
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_add_i32 s6, s6, 1
|
|
; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
|
; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; SI-NEXT: v_mov_b32_e32 v2, s6
|
|
; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: s_cbranch_execz .LBB15_6
|
|
; SI-NEXT: .LBB15_3: ; %hdr
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_execz .LBB15_2
|
|
; SI-NEXT: ; %bb.4: ; %kill
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; SI-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; SI-NEXT: ; %bb.5: ; %kill
|
|
; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_branch .LBB15_2
|
|
; SI-NEXT: .LBB15_6: ; %Flow
|
|
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: .LBB15_7: ; %._crit_edge
|
|
; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB15_8:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE64-LABEL: complex_loop:
|
|
; GFX10-WAVE64: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1
|
|
; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB15_3
|
|
; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1
|
|
; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
|
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6
|
|
; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6
|
|
; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr
|
|
; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill
|
|
; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: s_branch .LBB15_2
|
|
; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
; GFX10-WAVE64-NEXT: .LBB15_7: ; %._crit_edge
|
|
; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
; GFX10-WAVE64-NEXT: .LBB15_8:
|
|
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE64-NEXT: s_endpgm
|
|
;
|
|
; GFX10-WAVE32-LABEL: complex_loop:
|
|
; GFX10-WAVE32: ; %bb.0: ; %.entry
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1
|
|
; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB15_3
|
|
; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3
|
|
; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1
|
|
; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1
|
|
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2
|
|
; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6
|
|
; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr
|
|
; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8
|
|
; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill
|
|
; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: s_branch .LBB15_2
|
|
; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GFX10-WAVE32-NEXT: .LBB15_7: ; %._crit_edge
|
|
; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
; GFX10-WAVE32-NEXT: .LBB15_8:
|
|
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
|
|
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
|
|
; GFX10-WAVE32-NEXT: s_endpgm
|
|
.entry:
|
|
%flaga = icmp sgt i32 %cmpa, 0
|
|
br i1 %flaga, label %.lr.ph, label %._crit_edge
|
|
|
|
.lr.ph:
|
|
br label %hdr
|
|
|
|
hdr:
|
|
%ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
|
|
%flagb = icmp ugt i32 %ctr, %cmpb
|
|
br i1 %flagb, label %kill, label %latch
|
|
|
|
kill:
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
br label %latch
|
|
|
|
latch:
|
|
%ctr.next = add nuw nsw i32 %ctr, 1
|
|
%flagc = icmp slt i32 %ctr.next, %cmpc
|
|
br i1 %flagc, label %hdr, label %._crit_edge
|
|
|
|
._crit_edge:
|
|
%tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
|
|
%out = bitcast i32 %tmp to float
|
|
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
|
|
ret void
|
|
}
|
|
|
|
define void @skip_mode_switch(i32 %arg) {
|
|
; SI-LABEL: skip_mode_switch:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB16_2
|
|
; SI-NEXT: ; %bb.1: ; %bb.0
|
|
; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; SI-NEXT: .LBB16_2: ; %bb.1
|
|
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-WAVE64-LABEL: skip_mode_switch:
|
|
; GFX10-WAVE64: ; %bb.0: ; %entry
|
|
; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2
|
|
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0
|
|
; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1
|
|
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-WAVE32-LABEL: skip_mode_switch:
|
|
; GFX10-WAVE32: ; %bb.0: ; %entry
|
|
; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
|
|
; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
|
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo
|
|
; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2
|
|
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0
|
|
; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
|
|
; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1
|
|
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4
|
|
; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%cmp = icmp eq i32 %arg, 0
|
|
br i1 %cmp, label %bb.0, label %bb.1
|
|
|
|
bb.0:
|
|
call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
|
|
br label %bb.1
|
|
|
|
bb.1:
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
|
|
declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
|
|
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
|
|
declare void @llvm.amdgcn.kill(i1) #0
|
|
|
|
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readonly }
|
|
attributes #2 = { nounwind readnone speculatable }
|
|
attributes #3 = { inaccessiblememonly nounwind writeonly }
|