Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll
Alexey Bataev 30edf1c449 [SLP]Do not early exit if the number of unique elements is non-power-of-2. (#65476)
We still can try to vectorize the bundle of the instructions, even if
the
repeated number of instruction is non-power-of-2. In this case need to
adjust the cost (calculate the cost only for unique scalar instructions)
and cost of the extracts. Also, when scheduling the bundle need to
schedule only unique scalars to avoid compiler crash because of the
multiple dependencies. Can be safely applied only if all scalars's users
are also vectorized and do not require memory accesses (this one is
a temporarily requirement, can be relaxed later).

---------

Co-authored-by: Alexey Bataev <a.bataev@outlook.com>
2023-09-08 10:00:46 -04:00

25 lines
1009 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=x86_64-- -passes=slp-vectorizer,instcombine -S < %s | FileCheck %s
; These conversions should be vectorized by reviews.llvm.org/D57059
define dso_local <4 x float> @foo(<4 x i32> %0) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP0:%.*]] to <4 x float>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%2 = extractelement <4 x i32> %0, i32 1
%3 = sitofp i32 %2 to float
%4 = insertelement <4 x float> undef, float %3, i32 0
%5 = insertelement <4 x float> %4, float %3, i32 1
%6 = extractelement <4 x i32> %0, i32 2
%7 = sitofp i32 %6 to float
%8 = insertelement <4 x float> %5, float %7, i32 2
%9 = extractelement <4 x i32> %0, i32 3
%10 = sitofp i32 %9 to float
%11 = insertelement <4 x float> %8, float %10, i32 3
ret <4 x float> %11
}