Files
clang-p2996/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
Jay Foad d2e5d3512b [StructurizeCFG] Clean up some boolean not instructions
In some cases StructurizeCFG inserts i1 xor instructions to invert
predicates. Add a quick loop to clean these up afterwards if we can get
away with modifying an existing compare instruction instead.
(StructurizeCFG is generally run late in the pipeline so instcombine
does not clean them up for us.)

Differential Revision: https://reviews.llvm.org/D118623
2022-02-01 09:35:37 +00:00

82 lines
1.9 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}negated_cond:
; GCN: .LBB0_1:
; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]],
; GCN: .LBB0_3:
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: s_andn2_b64 vcc, exec, [[CC]]
; GCN: s_cbranch_vccnz .LBB0_2
define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) {
bb:
br label %bb1
bb1:
%tmp1 = load i32, i32 addrspace(1)* %arg1
%tmp2 = icmp eq i32 %tmp1, 0
br label %bb2
bb2:
%tmp3 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb4 ]
%tmp4 = shl i32 %tmp3, 5
br i1 %tmp2, label %bb3, label %bb4
bb3:
%tmp5 = add i32 %tmp4, 1
br label %bb4
bb4:
%tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ]
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp6
store i32 0, i32 addrspace(1)* %gep
%tmp7 = icmp eq i32 %tmp6, 32
br i1 %tmp7, label %bb1, label %bb2
}
; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
; GCN: s_cmp_lg_u32
; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0
; GCN: s_branch [[BB1:.LBB[0-9]+_[0-9]+]]
; GCN: [[BB0:.LBB[0-9]+_[0-9]+]]
; GCN-NOT: v_cndmask_b32
; GCN-NOT: v_cmp
; GCN: [[BB1]]:
; GCN: s_mov_b64 vcc, [[CC1]]
; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]]
; GCN: s_mov_b64 vcc, exec
; GCN: s_cbranch_execnz [[BB0]]
; GCN: [[BB2]]:
define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) {
bb:
br label %bb2
bb2:
%tmp1 = load i32, i32 addrspace(1)* %arg1
%tmp2 = icmp eq i32 %tmp1, 0
br label %bb4
bb3:
ret void
bb4:
%tmp3 = phi i32 [ 0, %bb2 ], [ %tmp7, %bb7 ]
%tmp4 = shl i32 %tmp3, 5
br i1 %tmp2, label %bb5, label %bb6
bb5:
%tmp5 = add i32 %tmp4, 1
br label %bb7
bb6:
%tmp6 = add i32 %tmp3, 1
br label %bb7
bb7:
%tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ]
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp7
store i32 0, i32 addrspace(1)* %gep
%tmp8 = icmp eq i32 %tmp7, 32
br i1 %tmp8, label %bb3, label %bb4
}