Files
clang-p2996/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
Sanjay Patel c36b7e21bd [InstCombine] enhance vector bitwise select matching
(Cond & C) | (~bitcast(Cond) & D) --> bitcast (select Cond, (bc C), (bc D))

This is part of fixing:
https://llvm.org/PR34047

That report shows a case where a bitcast is sitting between the select condition
candidate and its 'not' value due to current cast canonicalization rules.

There's a bitcast type restriction that might be violated in existing matching,
but I still need to investigate if that is possible -
Alive2 shows we can only do this transform safely when the bitcast is from
narrow to wide vector elements (otherwise poison could leak into elements
that were safe in the original code):
https://alive2.llvm.org/ce/z/Hf66qh

Differential Revision: https://reviews.llvm.org/D113035
2021-11-09 08:54:59 -05:00

94 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -O2 -S -mattr=sse < %s | FileCheck %s
; This file should represent the nearly raw (mem2reg was run to make it more direct)
; IR for code written using x86 SSE intrinsics to compute integer abs/max functions.
;
; https://llvm.org/PR34047
define available_externally <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
%call = call <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %a, <2 x i64> %b)
%call1 = call <2 x i64> @_mm_andnot_si128(<2 x i64> %call, <2 x i64> %c)
%call2 = call <2 x i64> @_mm_and_si128(<2 x i64> %call, <2 x i64> %d)
%call3 = call <2 x i64> @_mm_or_si128(<2 x i64> %call1, <2 x i64> %call2)
ret <2 x i64> %call3
}
define internal <2 x i64> @_mm_set1_epi32(i32 %__i) {
%call = call <2 x i64> @_mm_set_epi32(i32 %__i, i32 %__i, i32 %__i, i32 %__i)
ret <2 x i64> %call
}
define internal <2 x i64> @_mm_sub_epi32(<2 x i64> %__a, <2 x i64> %__b) {
%t0 = bitcast <2 x i64> %__a to <4 x i32>
%t1 = bitcast <2 x i64> %__b to <4 x i32>
%sub = sub <4 x i32> %t0, %t1
%t2 = bitcast <4 x i32> %sub to <2 x i64>
ret <2 x i64> %t2
}
define internal <2 x i64> @_mm_setzero_si128() {
ret <2 x i64> zeroinitializer
}
define internal <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %__a, <2 x i64> %__b) {
%t0 = bitcast <2 x i64> %__a to <4 x i32>
%t1 = bitcast <2 x i64> %__b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
ret <2 x i64> %t2
}
define internal <2 x i64> @_mm_or_si128(<2 x i64> %__a, <2 x i64> %__b) {
%or = or <2 x i64> %__a, %__b
ret <2 x i64> %or
}
define internal <2 x i64> @_mm_andnot_si128(<2 x i64> %__a, <2 x i64> %__b) {
%neg = xor <2 x i64> %__a, <i64 -1, i64 -1>
%and = and <2 x i64> %neg, %__b
ret <2 x i64> %and
}
define internal <2 x i64> @_mm_and_si128(<2 x i64> %__a, <2 x i64> %__b) {
%and = and <2 x i64> %__a, %__b
ret <2 x i64> %and
}
define internal <2 x i64> @_mm_set_epi32(i32 %__i3, i32 %__i2, i32 %__i1, i32 %__i0) {
%vecinit = insertelement <4 x i32> undef, i32 %__i0, i32 0
%vecinit1 = insertelement <4 x i32> %vecinit, i32 %__i1, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 %__i2, i32 2
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 %__i3, i32 3
%t0 = bitcast <4 x i32> %vecinit3 to <2 x i64>
ret <2 x i64> %t0
}
define <2 x i64> @abs_v4i32(<2 x i64> %x) {
; CHECK-LABEL: @abs_v4i32(
; CHECK-NEXT: [[T1_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[T1_I]], i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%call = call <2 x i64> @_mm_set1_epi32(i32 -1)
%call1 = call <2 x i64> @_mm_setzero_si128()
%call2 = call <2 x i64> @_mm_sub_epi32(<2 x i64> %call1, <2 x i64> %x)
%call3 = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %call, <2 x i64> %call2, <2 x i64> %x)
ret <2 x i64> %call3
}
define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: @max_v4i32(
; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]]
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%call = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %y, <2 x i64> %y, <2 x i64> %x)
ret <2 x i64> %call
}