Summary: The code was assuming in a few places that if there was only one exit from the function that it was a normal return, which is invalid. It could be an infinite loop, in which case we still need to insert the usual fake edge so that the null export happens. This fixes shaders that end with an infinite loop that discards. Reviewers: arsenm, nhaehnle, critson Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71192
41 lines
1.6 KiB
LLVM
41 lines
1.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s
|
|
|
|
; Make sure that the phi in n28 is updated when the block is split by unify
|
|
; divergent exit nodes.
|
|
|
|
define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 {
|
|
; IR-LABEL: @_amdgpu_ps_main(
|
|
; IR-NEXT: .entry:
|
|
; IR-NEXT: br label [[DOTLOOPEXIT:%.*]]
|
|
; IR: .loopexit:
|
|
; IR-NEXT: br label [[N28:%.*]]
|
|
; IR: n28:
|
|
; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
|
|
; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
|
|
; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
|
|
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]
|
|
; IR: TransitionBlock:
|
|
; IR-NEXT: br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]
|
|
; IR: n31:
|
|
; IR-NEXT: ret void
|
|
; IR: UnifiedReturnBlock:
|
|
; IR-NEXT: call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)
|
|
; IR-NEXT: ret void
|
|
;
|
|
.entry:
|
|
br label %.loopexit
|
|
|
|
.loopexit: ; preds = %n28, %.entry
|
|
br label %n28
|
|
|
|
n28: ; preds = %.loopexit, %n28
|
|
%.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
|
|
%n29 = fadd float %.01, 1.0
|
|
%n30 = fcmp ogt float %n29, 4.000000e+00
|
|
br i1 %n30, label %.loopexit, label %n28
|
|
|
|
n31: ; preds =
|
|
ret void
|
|
}
|