Files
clang-p2996/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
Alexander Timofeev c4d256a590 [AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.'
Detailed description:

    After https://reviews.llvm.org/D59990 submit several issues were discovered.
    Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly.

    Discovered issues were addressed in the following commits:

    https://reviews.llvm.org/D67662
    https://reviews.llvm.org/D67101
    https://reviews.llvm.org/D63953
    https://reviews.llvm.org/D63731

    This change brings back AMDGPU specific changes.

  Reviewed by: rampitec, arsenm

  Differential Revision: https://reviews.llvm.org/D68635

llvm-svn: 374767
2019-10-14 12:01:10 +00:00

163 lines
4.0 KiB
LLVM

; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
; Lowered break instructin:
; SI: s_or_b64
; Lowered Loop instruction:
; SI: s_andn2_b64
; s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
main_body:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%0 = and i32 %a, %tid
%1 = trunc i32 %0 to i1
br label %ENDIF
ENDLOOP:
store i32 0, i32 addrspace(1)* %out
ret void
ENDIF:
br i1 %1, label %ENDLOOP, label %ENDIF
}
; FUNC-LABEL: {{^}}phi_cond_outside_loop:
; SI: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0
; SI: s_mov_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0
; SI: ; %else
; SI: v_cmp_eq_u32_e64 [[TMP:s\[[0-9]+:[0-9]+\]]],
; SI: ; %endif
; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop
; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]]
; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]]
; SI: s_andn2_b64 exec, exec, [[LEFT]]
; SI: s_cbranch_execnz [[LOOP_LABEL]]
; SI: s_endpgm
define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
entry:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%0 = icmp eq i32 %tid , 0
br i1 %0, label %if, label %else
if:
br label %endif
else:
%1 = icmp eq i32 %b, 0
br label %endif
endif:
%2 = phi i1 [0, %if], [%1, %else]
br label %loop
loop:
br i1 %2, label %exit, label %loop
exit:
ret void
}
; FIXME: should emit s_endpgm
; CHECK-LABEL: {{^}}switch_unreachable:
; CHECK-NOT: s_endpgm
; CHECK: .Lfunc_end2
define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
centry:
switch i32 %x, label %sw.default [
i32 0, label %sw.bb
i32 60, label %sw.bb
]
sw.bb:
unreachable
sw.default:
unreachable
sw.epilog:
ret void
}
declare float @llvm.fabs.f32(float) nounwind readnone
; This broke the old AMDIL cfg structurizer
; FUNC-LABEL: {{^}}loop_land_info_assert:
; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond
; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]]
; SI: [[CONVEX_EXIT:BB[0-9_]+]]
; SI: s_mov_b64 vcc,
; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
; SI: s_cbranch_vccnz [[WHILELOOP]]
; SI: ; %if.else
; SI: buffer_store_dword
; SI: [[FOR_COND_PH]]: ; %for.cond.preheader
; SI: s_cbranch_vccz [[ENDPGM]]
; SI: [[ENDPGM]]:
; SI-NEXT: s_endpgm
define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
entry:
%cmp = icmp sgt i32 %c0, 0
br label %while.cond.outer
while.cond.outer:
%tmp = load float, float addrspace(1)* undef
br label %while.cond
while.cond:
%cmp1 = icmp slt i32 %c1, 4
br i1 %cmp1, label %convex.exit, label %for.cond
convex.exit:
%or = or i1 %cmp, %cmp1
br i1 %or, label %return, label %if.end
if.end:
%tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
%cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
br i1 %cmp2, label %if.else, label %while.cond.outer
if.else:
store volatile i32 3, i32 addrspace(1)* undef, align 4
br label %while.cond
for.cond:
%cmp3 = icmp slt i32 %c3, 1000
br i1 %cmp3, label %for.body, label %return
for.body:
br i1 %cmp3, label %self.loop, label %if.end.2
if.end.2:
%or.cond2 = or i1 %cmp3, %arg
br i1 %or.cond2, label %return, label %for.cond
self.loop:
br label %self.loop
return:
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
attributes #0 = { nounwind readnone }