This is a generic combine enabled via target hook to reduce icmp logic as discussed in: https://bugs.llvm.org/show_bug.cgi?id=32401 It's likely that other targets will want to enable this hook for scalar transforms, and there are probably other patterns that can use bitwise logic to reduce comparisons. Note that we are missing an IR canonicalization for these patterns, and we will probably prefer the pair-of-compares form in IR (shorter, more likely to fold). Differential Revision: https://reviews.llvm.org/D31483 llvm-svn: 299542
479 lines
12 KiB
LLVM
479 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown | FileCheck %s
|
|
|
|
define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq i32 %P, 0
|
|
%b = icmp eq i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: li 5, 0
|
|
; CHECK-NEXT: li 12, 1
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: isel 3, 12, 5, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq i32 %P, -1
|
|
%b = icmp eq i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne i32 %P, 0
|
|
%b = icmp ne i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: li 5, 1
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: isel 3, 0, 5, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne i32 %P, -1
|
|
%b = icmp ne i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_sign_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
|
|
define i32 @all_bits_clear_branch(i32* %P, i32* %Q) {
|
|
; CHECK-LABEL: all_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or. 3, 3, 4
|
|
; CHECK-NEXT: bne 0, .LBB8_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB8_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp eq i32* %P, null
|
|
%b = icmp eq i32* %Q, null
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, 0
|
|
; CHECK-NEXT: blt 0, .LBB9_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB9_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bne 0, .LBB10_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB10_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp eq i32 %P, -1
|
|
%b = icmp eq i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bgt 0, .LBB11_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB11_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
|
|
define i32 @any_bits_set_branch(i32* %P, i32* %Q) {
|
|
; CHECK-LABEL: any_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or. 3, 3, 4
|
|
; CHECK-NEXT: beq 0, .LBB12_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB12_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp ne i32* %P, null
|
|
%b = icmp ne i32* %Q, null
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bgt 0, .LBB13_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB13_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: beq 0, .LBB14_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB14_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp ne i32 %P, -1
|
|
%b = icmp ne i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, 0
|
|
; CHECK-NEXT: blt 0, .LBB15_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB15_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq <4 x i32> %P, zeroinitializer
|
|
%b = icmp eq <4 x i32> %Q, zeroinitializer
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 4, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt <4 x i32> %P, zeroinitializer
|
|
%b = icmp slt <4 x i32> %Q, zeroinitializer
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: xxlnor 34, 34, 34
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne <4 x i32> %P, zeroinitializer
|
|
%b = icmp ne <4 x i32> %Q, zeroinitializer
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 4, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt <4 x i32> %P, zeroinitializer
|
|
%b = icmp slt <4 x i32> %Q, zeroinitializer
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: xxlnor 34, 34, 34
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp ne <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
|
|
; CHECK-LABEL: ne_neg1_and_ne_zero:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: addi 3, 3, 1
|
|
; CHECK-NEXT: li 4, 0
|
|
; CHECK-NEXT: li 12, 1
|
|
; CHECK-NEXT: cmpldi 3, 1
|
|
; CHECK-NEXT: isel 3, 12, 4, 1
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp ne i64 %x, -1
|
|
%cmp2 = icmp ne i64 %x, 0
|
|
%and = and i1 %cmp1, %cmp2
|
|
ret i1 %and
|
|
}
|
|
|
|
; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
|
|
|
|
define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
|
|
; CHECK-LABEL: and_eq:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xor 5, 5, 6
|
|
; CHECK-NEXT: xor 3, 3, 4
|
|
; CHECK-NEXT: or 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp eq i16 %a, %b
|
|
%cmp2 = icmp eq i16 %c, %d
|
|
%and = and i1 %cmp1, %cmp2
|
|
ret i1 %and
|
|
}
|
|
|
|
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: or_ne:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xor 5, 5, 6
|
|
; CHECK-NEXT: xor 3, 3, 4
|
|
; CHECK-NEXT: or 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp ne i32 %a, %b
|
|
%cmp2 = icmp ne i32 %c, %d
|
|
%or = or i1 %cmp1, %cmp2
|
|
ret i1 %or
|
|
}
|
|
|
|
; This should not be transformed because vector compares + bitwise logic are faster.
|
|
|
|
define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
|
|
; CHECK-LABEL: and_eq_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vcmpequw 2, 2, 3
|
|
; CHECK-NEXT: vcmpequw 19, 4, 5
|
|
; CHECK-NEXT: xxland 34, 34, 51
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp eq <4 x i32> %a, %b
|
|
%cmp2 = icmp eq <4 x i32> %c, %d
|
|
%and = and <4 x i1> %cmp1, %cmp2
|
|
ret <4 x i1> %and
|
|
}
|
|
|