This patch allows AMDGPUUnifyDivergenceExitNodes pass to transform a function whose PDT has exactly one root and ends in a branch instruction. Fixes https://github.com/llvm/llvm-project/issues/58861. Reviewed By: ruiling, arsenm Differential Revision: https://reviews.llvm.org/D139780
49 lines
1.6 KiB
LLVM
49 lines
1.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s
|
|
|
|
define void @nested_inf_loop(i1 %0, i1 %1) {
|
|
; CHECK-LABEL: nested_inf_loop:
|
|
; CHECK-NEXT: %bb.0: ; %BB
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
|
|
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
|
|
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
|
; CHECK-NEXT: s_xor_b64 s[6:7], vcc, -1
|
|
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
|
; CHECK-NEXT: .LBB0_1: ; %BB1
|
|
; CHECK: s_and_b64 s[10:11], exec, s[6:7]
|
|
; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
|
|
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
|
|
; CHECK-NEXT: %bb.2: ; %BB2
|
|
; CHECK: s_or_b64 exec, exec, s[8:9]
|
|
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
|
; CHECK-NEXT: .LBB0_3: ; %BB4
|
|
; CHECK: s_and_b64 s[10:11], exec, s[4:5]
|
|
; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
|
|
; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9]
|
|
; CHECK-NEXT: s_cbranch_execnz .LBB0_3
|
|
; CHECK-NEXT: %bb.4: ; %loop.exit.guard
|
|
; CHECK: s_or_b64 exec, exec, s[8:9]
|
|
; CHECK-NEXT: s_mov_b64 vcc, 0
|
|
; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
|
; CHECK-NEXT: s_branch .LBB0_1
|
|
; CHECK-NEXT: %bb.5: ; %DummyReturnBlock
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
BB:
|
|
br label %BB1
|
|
|
|
BB1:
|
|
br i1 %0, label %BB3, label %BB2
|
|
|
|
BB2:
|
|
br label %BB4
|
|
|
|
BB4:
|
|
br i1 %1, label %BB3, label %BB4
|
|
|
|
BB3:
|
|
br label %BB1
|
|
}
|