There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
156 lines
5.3 KiB
LLVM
156 lines
5.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
|
|
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
|
|
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
|
|
; CHECK-NEXT: ret i32 [[J]]
|
|
;
|
|
%e = icmp slt i32 %a, %b
|
|
%f = sext i1 %e to i32
|
|
%g = and i32 %c, %f
|
|
%h = xor i32 %f, -1
|
|
%i = and i32 %d, %h
|
|
%j = or i32 %g, %i
|
|
ret i32 %j
|
|
}
|
|
|
|
define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: @bar(
|
|
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
|
|
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
|
|
; CHECK-NEXT: ret i32 [[J]]
|
|
;
|
|
%e = icmp slt i32 %a, %b
|
|
%f = sext i1 %e to i32
|
|
%g = and i32 %c, %f
|
|
%h = xor i32 %f, -1
|
|
%i = and i32 %d, %h
|
|
%j = or i32 %i, %g
|
|
ret i32 %j
|
|
}
|
|
|
|
define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: @goo(
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
;
|
|
%t0 = icmp slt i32 %a, %b
|
|
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
|
|
%t1 = and i32 %iftmp.0.0, %c
|
|
%not = xor i32 %iftmp.0.0, -1
|
|
%t2 = and i32 %not, %d
|
|
%t3 = or i32 %t1, %t2
|
|
ret i32 %t3
|
|
}
|
|
|
|
define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: @poo(
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
;
|
|
%t0 = icmp slt i32 %a, %b
|
|
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
|
|
%t1 = and i32 %iftmp.0.0, %c
|
|
%iftmp = select i1 %t0, i32 0, i32 -1
|
|
%t2 = and i32 %iftmp, %d
|
|
%t3 = or i32 %t1, %t2
|
|
ret i32 %t3
|
|
}
|
|
|
|
define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: @par(
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
;
|
|
%t0 = icmp slt i32 %a, %b
|
|
%iftmp.1.0 = select i1 %t0, i32 -1, i32 0
|
|
%t1 = and i32 %iftmp.1.0, %c
|
|
%not = xor i32 %iftmp.1.0, -1
|
|
%t2 = and i32 %not, %d
|
|
%t3 = or i32 %t1, %t2
|
|
ret i32 %t3
|
|
}
|
|
|
|
; In the following tests, verify that a bitcast doesn't get in the way
|
|
; of a select transform. These bitcasts are common in SSE/AVX and possibly
|
|
; other vector code because of canonicalization to i64 elements for vectors.
|
|
|
|
define <2 x i64> @bitcast_select(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: @bitcast_select(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> %b to <4 x i32>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
;
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%t2 = bitcast <4 x i32> %sext to <2 x i64>
|
|
%and = and <2 x i64> %t2, %a
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
%and2 = and <2 x i64> %neg2, %b
|
|
%or = or <2 x i64> %and, %and2
|
|
ret <2 x i64> %or
|
|
}
|
|
|
|
define <2 x i64> @bitcast_select_swap_or_ops(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: @bitcast_select_swap_or_ops(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> %b to <4 x i32>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
;
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%t2 = bitcast <4 x i32> %sext to <2 x i64>
|
|
%and = and <2 x i64> %t2, %a
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
%and2 = and <2 x i64> %neg2, %b
|
|
%or = or <2 x i64> %and2, %and
|
|
ret <2 x i64> %or
|
|
}
|
|
|
|
define <2 x i64> @bitcast_select_swap_and_ops(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: @bitcast_select_swap_and_ops(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> %b to <4 x i32>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
;
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%t2 = bitcast <4 x i32> %sext to <2 x i64>
|
|
%and = and <2 x i64> %t2, %a
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
%and2 = and <2 x i64> %b, %neg2
|
|
%or = or <2 x i64> %and, %and2
|
|
ret <2 x i64> %or
|
|
}
|
|
|
|
define <2 x i64> @bitcast_select_swap_and_ops2(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
|
|
; CHECK-LABEL: @bitcast_select_swap_and_ops2(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> %b to <4 x i32>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
;
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%t2 = bitcast <4 x i32> %sext to <2 x i64>
|
|
%and = and <2 x i64> %a, %t2
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
%and2 = and <2 x i64> %neg2, %b
|
|
%or = or <2 x i64> %and, %and2
|
|
ret <2 x i64> %or
|
|
}
|
|
|