Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
Alexey Bataev 2e972ea056 [SLP]Integrate looking through shuffles logic into ShuffleInstructionBuilder.
Added BaseShuffleAnalysis as a base class for ShuffleInstructionBuilder
and integrated shuffle logic from shuffles for externally used scalars
into this class. This class is used as the main container that
implements smart shuffle instruction builder logic.
ShuffleInstructionBuilder uses this logic.
ShuffleInstructionBuilder is also used in building of the shuffle for
the externally used scalars instead of lambdas, which are now part of BaseShuffleAnalysis class.

Differential Revision: https://reviews.llvm.org/D140100
2022-12-21 06:12:53 -08:00

69 lines
3.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-android23 < %s | FileCheck %s
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds float, ptr undef, i32 2
%2 = load float, ptr %1, align 4
%3 = load float, ptr undef, align 4
%4 = fsub float %2, %3
%5 = getelementptr inbounds float, ptr undef, i32 3
%6 = load float, ptr %5, align 4
%7 = getelementptr inbounds float, ptr undef, i32 1
%8 = load float, ptr %7, align 4
%9 = fsub float %6, %8
%10 = fcmp olt float %9, %4
%11 = insertelement <2 x float> undef, float %3, i64 0
%12 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
store <2 x float> zeroinitializer, ptr null, align 4
%13 = insertelement <2 x float> %11, float %6, i64 0
store <2 x float> zeroinitializer, ptr null, align 4
ret void
}
define void @test1() {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr null, align 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds float, ptr undef, i32 2
%2 = load float, ptr %1, align 4
%3 = load float, ptr undef, align 4
%4 = fsub float %2, %3
%5 = getelementptr inbounds float, ptr undef, i32 3
%6 = load float, ptr %5, align 4
%7 = getelementptr inbounds float, ptr undef, i32 1
%8 = load float, ptr %7, align 4
%9 = fsub float %6, %8
%10 = fcmp olt float %9, %4
%.sroa.0.0.vec.insert.i5.i10 = insertelement <2 x float> undef, float %3, i64 0
%.sroa.0.4.vec.insert.i10.i13 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %8, i64 1
store <2 x float> %.sroa.0.4.vec.insert.i10.i13, ptr null, align 4
%.sroa.0.4.vec.insert.i10.i13.2 = insertelement <2 x float> %.sroa.0.0.vec.insert.i5.i10, float %6, i64 1
store <2 x float> %.sroa.0.4.vec.insert.i10.i13.2, ptr null, align 4
ret void
}