(Cond & C) | (~bitcast(Cond) & D) --> bitcast (select Cond, (bc C), (bc D)) This is part of fixing: https://llvm.org/PR34047 That report shows a case where a bitcast is sitting between the select condition candidate and its 'not' value due to current cast canonicalization rules. There's a bitcast type restriction that might be violated in existing matching, but I still need to investigate if that is possible - Alive2 shows we can only do this transform safely when the bitcast is from narrow to wide vector elements (otherwise poison could leak into elements that were safe in the original code): https://alive2.llvm.org/ce/z/Hf66qh Differential Revision: https://reviews.llvm.org/D113035
94 lines
3.7 KiB
LLVM
94 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -O2 -S -mattr=sse < %s | FileCheck %s
|
|
|
|
; This file should represent the nearly raw (mem2reg was run to make it more direct)
|
|
; IR for code written using x86 SSE intrinsics to compute integer abs/max functions.
|
|
;
|
|
; https://llvm.org/PR34047
|
|
|
|
define available_externally <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
|
|
%call = call <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %a, <2 x i64> %b)
|
|
%call1 = call <2 x i64> @_mm_andnot_si128(<2 x i64> %call, <2 x i64> %c)
|
|
%call2 = call <2 x i64> @_mm_and_si128(<2 x i64> %call, <2 x i64> %d)
|
|
%call3 = call <2 x i64> @_mm_or_si128(<2 x i64> %call1, <2 x i64> %call2)
|
|
ret <2 x i64> %call3
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_set1_epi32(i32 %__i) {
|
|
%call = call <2 x i64> @_mm_set_epi32(i32 %__i, i32 %__i, i32 %__i, i32 %__i)
|
|
ret <2 x i64> %call
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_sub_epi32(<2 x i64> %__a, <2 x i64> %__b) {
|
|
%t0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%t1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%sub = sub <4 x i32> %t0, %t1
|
|
%t2 = bitcast <4 x i32> %sub to <2 x i64>
|
|
ret <2 x i64> %t2
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_setzero_si128() {
|
|
ret <2 x i64> zeroinitializer
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %__a, <2 x i64> %__b) {
|
|
%t0 = bitcast <2 x i64> %__a to <4 x i32>
|
|
%t1 = bitcast <2 x i64> %__b to <4 x i32>
|
|
%cmp = icmp sgt <4 x i32> %t0, %t1
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%t2 = bitcast <4 x i32> %sext to <2 x i64>
|
|
ret <2 x i64> %t2
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_or_si128(<2 x i64> %__a, <2 x i64> %__b) {
|
|
%or = or <2 x i64> %__a, %__b
|
|
ret <2 x i64> %or
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_andnot_si128(<2 x i64> %__a, <2 x i64> %__b) {
|
|
%neg = xor <2 x i64> %__a, <i64 -1, i64 -1>
|
|
%and = and <2 x i64> %neg, %__b
|
|
ret <2 x i64> %and
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_and_si128(<2 x i64> %__a, <2 x i64> %__b) {
|
|
%and = and <2 x i64> %__a, %__b
|
|
ret <2 x i64> %and
|
|
}
|
|
|
|
define internal <2 x i64> @_mm_set_epi32(i32 %__i3, i32 %__i2, i32 %__i1, i32 %__i0) {
|
|
%vecinit = insertelement <4 x i32> undef, i32 %__i0, i32 0
|
|
%vecinit1 = insertelement <4 x i32> %vecinit, i32 %__i1, i32 1
|
|
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 %__i2, i32 2
|
|
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 %__i3, i32 3
|
|
%t0 = bitcast <4 x i32> %vecinit3 to <2 x i64>
|
|
ret <2 x i64> %t0
|
|
}
|
|
|
|
define <2 x i64> @abs_v4i32(<2 x i64> %x) {
|
|
; CHECK-LABEL: @abs_v4i32(
|
|
; CHECK-NEXT: [[T1_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[T1_I]], i1 false)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
|
|
;
|
|
%call = call <2 x i64> @_mm_set1_epi32(i32 -1)
|
|
%call1 = call <2 x i64> @_mm_setzero_si128()
|
|
%call2 = call <2 x i64> @_mm_sub_epi32(<2 x i64> %call1, <2 x i64> %x)
|
|
%call3 = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %call, <2 x i64> %call2, <2 x i64> %x)
|
|
ret <2 x i64> %call3
|
|
}
|
|
|
|
define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: @max_v4i32(
|
|
; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
|
|
; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
|
|
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
|
|
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
|
|
;
|
|
%call = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %y, <2 x i64> %y, <2 x i64> %x)
|
|
ret <2 x i64> %call
|
|
}
|