Files
clang-p2996/llvm/test/CodeGen/PowerPC/setcc-logic.ll
Sanjay Patel b2f1621bb1 [DAGCombiner] add and use TLI hook to convert and-of-seteq / or-of-setne to bitwise logic+setcc (PR32401)
This is a generic combine enabled via target hook to reduce icmp logic as discussed in:
https://bugs.llvm.org/show_bug.cgi?id=32401

It's likely that other targets will want to enable this hook for scalar transforms, 
and there are probably other patterns that can use bitwise logic to reduce comparisons.

Note that we are missing an IR canonicalization for these patterns, and we will probably
prefer the pair-of-compares form in IR (shorter, more likely to fold).

Differential Revision: https://reviews.llvm.org/D31483

llvm-svn: 299542
2017-04-05 14:09:39 +00:00

479 lines
12 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown | FileCheck %s
define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_clear:
; CHECK: # BB#0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%a = icmp eq i32 %P, 0
%b = icmp eq i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_clear:
; CHECK: # BB#0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_set:
; CHECK: # BB#0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: li 5, 0
; CHECK-NEXT: li 12, 1
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: isel 3, 12, 5, 2
; CHECK-NEXT: blr
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_set:
; CHECK: # BB#0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_set:
; CHECK: # BB#0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%a = icmp ne i32 %P, 0
%b = icmp ne i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_set:
; CHECK: # BB#0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_clear:
; CHECK: # BB#0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: li 5, 1
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: isel 3, 0, 5, 2
; CHECK-NEXT: blr
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_clear:
; CHECK: # BB#0:
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
define i32 @all_bits_clear_branch(i32* %P, i32* %Q) {
; CHECK-LABEL: all_bits_clear_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: or. 3, 3, 4
; CHECK-NEXT: bne 0, .LBB8_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB8_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp eq i32* %P, null
%b = icmp eq i32* %Q, null
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_clear_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, 0
; CHECK-NEXT: blt 0, .LBB9_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB9_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_bits_set_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: bne 0, .LBB10_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB10_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: all_sign_bits_set_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: bgt 0, .LBB11_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB11_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
define i32 @any_bits_set_branch(i32* %P, i32* %Q) {
; CHECK-LABEL: any_bits_set_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: or. 3, 3, 4
; CHECK-NEXT: beq 0, .LBB12_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB12_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp ne i32* %P, null
%b = icmp ne i32* %Q, null
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_set_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: bgt 0, .LBB13_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB13_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_clear_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, -1
; CHECK-NEXT: beq 0, .LBB14_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB14_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) {
; CHECK-LABEL: any_sign_bits_clear_branch:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: cmpwi 0, 3, 0
; CHECK-NEXT: blt 0, .LBB15_2
; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: li 3, 4
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB15_2: # %return
; CHECK-NEXT: li 3, 192
; CHECK-NEXT: blr
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_bits_clear_vec:
; CHECK: # BB#0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp eq <4 x i32> %P, zeroinitializer
%b = icmp eq <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_sign_bits_clear_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, -1
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_bits_set_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, -1
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: all_sign_bits_set_vec:
; CHECK: # BB#0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 4, 2
; CHECK-NEXT: blr
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_bits_set_vec:
; CHECK: # BB#0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, zeroinitializer
%b = icmp ne <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_sign_bits_set_vec:
; CHECK: # BB#0:
; CHECK-NEXT: xxlxor 36, 36, 36
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 4, 2
; CHECK-NEXT: blr
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_bits_clear_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, -1
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpequw 2, 2, 4
; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp ne <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK-LABEL: any_sign_bits_clear_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, -1
; CHECK-NEXT: xxland 34, 34, 35
; CHECK-NEXT: vcmpgtsw 2, 2, 4
; CHECK-NEXT: blr
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
; CHECK-LABEL: ne_neg1_and_ne_zero:
; CHECK: # BB#0:
; CHECK-NEXT: addi 3, 3, 1
; CHECK-NEXT: li 4, 0
; CHECK-NEXT: li 12, 1
; CHECK-NEXT: cmpldi 3, 1
; CHECK-NEXT: isel 3, 12, 4, 1
; CHECK-NEXT: blr
%cmp1 = icmp ne i64 %x, -1
%cmp2 = icmp ne i64 %x, 0
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
; CHECK-LABEL: and_eq:
; CHECK: # BB#0:
; CHECK-NEXT: xor 5, 5, 6
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%cmp1 = icmp eq i16 %a, %b
%cmp2 = icmp eq i16 %c, %d
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: or_ne:
; CHECK: # BB#0:
; CHECK-NEXT: xor 5, 5, 6
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
; CHECK-NEXT: blr
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d
%or = or i1 %cmp1, %cmp2
ret i1 %or
}
; This should not be transformed because vector compares + bitwise logic are faster.
define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
; CHECK-LABEL: and_eq_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vcmpequw 2, 2, 3
; CHECK-NEXT: vcmpequw 19, 4, 5
; CHECK-NEXT: xxland 34, 34, 51
; CHECK-NEXT: blr
%cmp1 = icmp eq <4 x i32> %a, %b
%cmp2 = icmp eq <4 x i32> %c, %d
%and = and <4 x i1> %cmp1, %cmp2
ret <4 x i1> %and
}