Files
clang-p2996/llvm/test/CodeGen/AMDGPU/update-phi.ll
Connor Abbott ce06d50756 AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns
Summary:
The code was assuming in a few places that if there was only one exit
from the function that it was a normal return, which is invalid. It
could be an infinite loop, in which case we still need to insert the
usual fake edge so that the null export happens. This fixes shaders that
end with an infinite loop that discards.

Reviewers: arsenm, nhaehnle, critson

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71192
2020-01-30 10:55:02 +01:00

41 lines
1.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s
; Make sure that the phi in n28 is updated when the block is split by unify
; divergent exit nodes.
define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 {
; IR-LABEL: @_amdgpu_ps_main(
; IR-NEXT: .entry:
; IR-NEXT: br label [[DOTLOOPEXIT:%.*]]
; IR: .loopexit:
; IR-NEXT: br label [[N28:%.*]]
; IR: n28:
; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]
; IR: n31:
; IR-NEXT: ret void
; IR: UnifiedReturnBlock:
; IR-NEXT: call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)
; IR-NEXT: ret void
;
.entry:
br label %.loopexit
.loopexit: ; preds = %n28, %.entry
br label %n28
n28: ; preds = %.loopexit, %n28
%.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
%n29 = fadd float %.01, 1.0
%n30 = fcmp ogt float %n29, 4.000000e+00
br i1 %n30, label %.loopexit, label %n28
n31: ; preds =
ret void
}