Files
clang-p2996/llvm/test/CodeGen/X86/setcc-logic.ll
Simon Pilgrim 59fa435ea6 [X86] Canonicalize SGT/UGT compares with constants to use SGE/UGE to reduce the number of EFLAGs reads. (PR48760)
This demonstrates a possible fix for PR48760 - for compares with constants, canonicalize the SGT/UGT condition code to use SGE/UGE which should reduce the number of EFLAGs bits we need to read.

As discussed on PR48760, some EFLAG bits are treated independently which can require additional uops to merge together for certain CMOVcc/SETcc/etc. modes.

I've limited this to cases where the constant increment doesn't result in a larger encoding or additional i64 constant materializations.

Differential Revision: https://reviews.llvm.org/D101074
2021-06-30 18:46:50 +01:00

704 lines
18 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,NOBMI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI
define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%a = icmp eq i32 %P, 0
%b = icmp eq i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_bits_set(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl $-1, %edi
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_set(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%a = icmp ne i32 %P, 0
%b = icmp ne i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_set:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: shrl $31, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl $-1, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
define zeroext i1 @any_sign_bits_clear(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_clear:
; CHECK: # %bb.0:
; CHECK-NEXT: testl %esi, %edi
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
ret i1 %c
}
; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
define i32 @all_bits_clear_branch(i32* %P, i32* %Q) nounwind {
; CHECK-LABEL: all_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: orq %rsi, %rdi
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB8_2: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
%a = icmp eq i32* %P, null
%b = icmp eq i32* %Q, null
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: js .LBB9_3
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: js .LBB9_3
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB9_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $-1, %edi
; CHECK-NEXT: jne .LBB10_3
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: cmpl $-1, %esi
; CHECK-NEXT: jne .LBB10_3
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB10_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
%a = icmp eq i32 %P, -1
%b = icmp eq i32 %Q, -1
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: jns .LBB11_3
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: jns .LBB11_3
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB11_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
define i32 @any_bits_set_branch(i32* %P, i32* %Q) nounwind {
; CHECK-LABEL: any_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: orq %rsi, %rdi
; CHECK-NEXT: je .LBB12_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB12_2: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
%a = icmp ne i32* %P, null
%b = icmp ne i32* %Q, null
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: js .LBB13_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: js .LBB13_2
; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB13_2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $-1, %edi
; CHECK-NEXT: jne .LBB14_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: cmpl $-1, %esi
; CHECK-NEXT: jne .LBB14_2
; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB14_2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
entry:
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: jns .LBB15_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: jns .LBB15_2
; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB15_2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
%c = or i1 %a, %b
br i1 %c, label %bb1, label %return
bb1:
ret i32 4
return:
ret i32 192
}
; PR44565 - https://bugs.llvm.org/show_bug.cgi?id=44565
define i32 @vec_extract_branch(<2 x double> %x) {
; CHECK-LABEL: vec_extract_branch:
; CHECK: # %bb.0:
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: cmpltpd %xmm0, %xmm1
; CHECK-NEXT: movmskpd %xmm1, %eax
; CHECK-NEXT: cmpb $3, %al
; CHECK-NEXT: jne .LBB16_2
; CHECK-NEXT: # %bb.1: # %true
; CHECK-NEXT: movl $42, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB16_2: # %false
; CHECK-NEXT: movl $88, %eax
; CHECK-NEXT: retq
%t1 = fcmp ogt <2 x double> %x, zeroinitializer
%t2 = extractelement <2 x i1> %t1, i32 0
%t3 = extractelement <2 x i1> %t1, i32 1
%t4 = and i1 %t2, %t3
br i1 %t4, label %true, label %false
true:
ret i32 42
false:
ret i32 88
}
define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: all_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp eq <4 x i32> %P, zeroinitializer
%b = icmp eq <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: all_sign_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: all_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: pand %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: all_sign_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: pand %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = and <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: any_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp ne <4 x i32> %P, zeroinitializer
%b = icmp ne <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: any_sign_bits_set_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp slt <4 x i32> %P, zeroinitializer
%b = icmp slt <4 x i32> %Q, zeroinitializer
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: any_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: pand %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp ne <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) nounwind {
; CHECK-LABEL: any_sign_bits_clear_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: pand %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-NEXT: retq
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
%c = or <4 x i1> %a, %b
ret <4 x i1> %c
}
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) nounwind {
; CHECK-LABEL: ne_neg1_and_ne_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: cmpq $2, %rdi
; CHECK-NEXT: setae %al
; CHECK-NEXT: retq
%cmp1 = icmp ne i64 %x, -1
%cmp2 = icmp ne i64 %x, 0
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
; CHECK-LABEL: and_eq:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: xorl %ecx, %edx
; CHECK-NEXT: orb %dl, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%cmp1 = icmp eq i8 %a, %b
%cmp2 = icmp eq i8 %c, %d
%and = and i1 %cmp1, %cmp2
ret i1 %and
}
define zeroext i1 @or_ne(i8 %a, i8 %b, i8 %c, i8 %d) nounwind {
; CHECK-LABEL: or_ne:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: xorl %ecx, %edx
; CHECK-NEXT: orb %dl, %dil
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%cmp1 = icmp ne i8 %a, %b
%cmp2 = icmp ne i8 %c, %d
%or = or i1 %cmp1, %cmp2
ret i1 %or
}
; This should not be transformed because vector compares + bitwise logic are faster.
define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind {
; CHECK-LABEL: and_eq_vec:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
; CHECK-NEXT: pand %xmm2, %xmm0
; CHECK-NEXT: retq
%cmp1 = icmp eq <4 x i32> %a, %b
%cmp2 = icmp eq <4 x i32> %c, %d
%and = and <4 x i1> %cmp1, %cmp2
ret <4 x i1> %and
}
define i1 @or_icmps_const_1bit_diff(i8 %x) {
; CHECK-LABEL: or_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: addb $-43, %dil
; CHECK-NEXT: testb $-3, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%a = icmp eq i8 %x, 43
%b = icmp eq i8 %x, 45
%r = or i1 %a, %b
ret i1 %r
}
define i1 @and_icmps_const_1bit_diff(i32 %x) {
; CHECK-LABEL: and_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: addl $-44, %edi
; CHECK-NEXT: testl $-17, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%a = icmp ne i32 %x, 44
%b = icmp ne i32 %x, 60
%r = and i1 %a, %b
ret i1 %r
}
; Negative test - extra use prevents optimization
define i1 @or_icmps_const_1bit_diff_extra_use(i8 %x, i8* %p) {
; CHECK-LABEL: or_icmps_const_1bit_diff_extra_use:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpb $45, %dil
; CHECK-NEXT: sete %cl
; CHECK-NEXT: cmpb $43, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: sete (%rsi)
; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: retq
%a = icmp eq i8 %x, 43
%b = icmp eq i8 %x, 45
%r = or i1 %a, %b
%z = zext i1 %a to i8
store i8 %z, i8* %p
ret i1 %r
}
; Negative test - constant diff is >1 bit
define i1 @and_icmps_const_not1bit_diff(i32 %x) {
; CHECK-LABEL: and_icmps_const_not1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $44, %edi
; CHECK-NEXT: setne %cl
; CHECK-NEXT: cmpl $92, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: retq
%a = icmp ne i32 %x, 44
%b = icmp ne i32 %x, 92
%r = and i1 %a, %b
ret i1 %r
}
; Negative test - wrong comparison
define i1 @and_icmps_const_1bit_diff_wrong_pred(i32 %x) {
; CHECK-LABEL: and_icmps_const_1bit_diff_wrong_pred:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $43, %edi
; CHECK-NEXT: sete %cl
; CHECK-NEXT: cmpl $45, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: retq
%a = icmp eq i32 %x, 43
%b = icmp slt i32 %x, 45
%r = or i1 %a, %b
ret i1 %r
}
; Negative test - no common operand
define i1 @and_icmps_const_1bit_diff_common_op(i32 %x, i32 %y) {
; CHECK-LABEL: and_icmps_const_1bit_diff_common_op:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $43, %edi
; CHECK-NEXT: sete %cl
; CHECK-NEXT: cmpl $45, %esi
; CHECK-NEXT: sete %al
; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: retq
%a = icmp eq i32 %x, 43
%b = icmp eq i32 %y, 45
%r = or i1 %a, %b
ret i1 %r
}
; PR44136 - fold cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0)
define i1 @or_cmp_eq_i64(i64 %x, i64 %y) {
; NOBMI-LABEL: or_cmp_eq_i64:
; NOBMI: # %bb.0:
; NOBMI-NEXT: notq %rdi
; NOBMI-NEXT: testq %rsi, %rdi
; NOBMI-NEXT: sete %al
; NOBMI-NEXT: retq
;
; BMI-LABEL: or_cmp_eq_i64:
; BMI: # %bb.0:
; BMI-NEXT: andnq %rsi, %rdi, %rax
; BMI-NEXT: sete %al
; BMI-NEXT: retq
%o = or i64 %x, %y
%c = icmp eq i64 %o, %x
ret i1 %c
}
define i1 @or_cmp_ne_i32(i32 %x, i32 %y) {
; NOBMI-LABEL: or_cmp_ne_i32:
; NOBMI: # %bb.0:
; NOBMI-NEXT: notl %esi
; NOBMI-NEXT: testl %edi, %esi
; NOBMI-NEXT: setne %al
; NOBMI-NEXT: retq
;
; BMI-LABEL: or_cmp_ne_i32:
; BMI: # %bb.0:
; BMI-NEXT: andnl %edi, %esi, %eax
; BMI-NEXT: setne %al
; BMI-NEXT: retq
%o = or i32 %x, %y
%c = icmp ne i32 %o, %y
ret i1 %c
}
define i1 @or_cmp_eq_i16(i16 zeroext %x, i16 zeroext %y) {
; NOBMI-LABEL: or_cmp_eq_i16:
; NOBMI: # %bb.0:
; NOBMI-NEXT: notl %edi
; NOBMI-NEXT: testl %esi, %edi
; NOBMI-NEXT: sete %al
; NOBMI-NEXT: retq
;
; BMI-LABEL: or_cmp_eq_i16:
; BMI: # %bb.0:
; BMI-NEXT: andnl %esi, %edi, %eax
; BMI-NEXT: sete %al
; BMI-NEXT: retq
%o = or i16 %x, %y
%c = icmp eq i16 %x, %o
ret i1 %c
}
define i1 @or_cmp_ne_i8(i8 zeroext %x, i8 zeroext %y) {
; CHECK-LABEL: or_cmp_ne_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: notb %sil
; CHECK-NEXT: testb %dil, %sil
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%o = or i8 %x, %y
%c = icmp ne i8 %y, %o
ret i1 %c
}
; Don't fold vectors.
define <4 x i32> @or_cmp_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: or_cmp_eq_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm0, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: retq
%o = or <4 x i32> %x, %y
%c = icmp eq <4 x i32> %o, %x
%s = sext <4 x i1> %c to <4 x i32>
ret <4 x i32> %s
}
define <16 x i8> @or_cmp_ne_v4i32(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: or_cmp_ne_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: por %xmm0, %xmm1
; CHECK-NEXT: pcmpeqb %xmm1, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: retq
%o = or <16 x i8> %x, %y
%c = icmp ne <16 x i8> %o, %x
%s = sext <16 x i1> %c to <16 x i8>
ret <16 x i8> %s
}