Files
clang-p2996/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
Hans Wennborg a45f301f7a Revert r368339 "[MBP] Disable aggressive loop rotate in plain mode"
It caused assertions to fire when building Chromium:

  lib/CodeGen/LiveDebugValues.cpp:331: bool
  {anonymous}::LiveDebugValues::OpenRangesSet::empty() const: Assertion
  `Vars.empty() == VarLocs.empty() && "open ranges are inconsistent"' failed.

See https://crbug.com/992871#c3 for how to reproduce.

> Patch https://reviews.llvm.org/D43256 introduced more aggressive loop layout optimization which depends on profile information. If profile information is not available, the statically estimated profile information(generated by BranchProbabilityInfo.cpp) is used. If user program doesn't behave as BranchProbabilityInfo.cpp expected, the layout may be worse.
>
> To be conservative this patch restores the original layout algorithm in plain mode. But user can still try the aggressive layout optimization with -force-precise-rotation-cost=true.
>
> Differential Revision: https://reviews.llvm.org/D65673

llvm-svn: 368579
2019-08-12 14:23:13 +00:00

76 lines
1.7 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}negated_cond:
; GCN: BB0_1:
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
; GCN: BB0_3:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: s_andn2_b64 vcc, exec, [[CC]]
; GCN: s_cbranch_vccnz BB0_2
define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
bb:
br label %bb1
bb1:
%tmp1 = load i32, i32 addrspace(1)* %arg1
%tmp2 = icmp eq i32 %tmp1, 0
br label %bb2
bb2:
%tmp3 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb4 ]
%tmp4 = shl i32 %tmp3, 5
br i1 %tmp2, label %bb3, label %bb4
bb3:
%tmp5 = add i32 %tmp4, 1
br label %bb4
bb4:
%tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ]
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp6
store i32 0, i32 addrspace(1)* %gep
%tmp7 = icmp eq i32 %tmp6, 32
br i1 %tmp7, label %bb1, label %bb2
}
; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
; GCN: %bb4
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: s_andn2_b64 vcc, exec, [[CC]]
; GCN: s_cbranch_vccnz BB1_1
define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
bb:
br label %bb2
bb2:
%tmp1 = load i32, i32 addrspace(1)* %arg1
%tmp2 = icmp eq i32 %tmp1, 0
br label %bb4
bb3:
ret void
bb4:
%tmp3 = phi i32 [ 0, %bb2 ], [ %tmp7, %bb7 ]
%tmp4 = shl i32 %tmp3, 5
br i1 %tmp2, label %bb5, label %bb6
bb5:
%tmp5 = add i32 %tmp4, 1
br label %bb7
bb6:
%tmp6 = add i32 %tmp3, 1
br label %bb7
bb7:
%tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ]
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp7
store i32 0, i32 addrspace(1)* %gep
%tmp8 = icmp eq i32 %tmp7, 32
br i1 %tmp8, label %bb3, label %bb4
}