Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
Alexey Bataev 40105a9933 [SLP]Find reused scalars in buildvector sequences, if any.
Patch generalizes analysis of scalars. The main part is outlined into
lambda, which can be used to find reused inserted scalars and emit
shuffle for them instead of multiple insertelement instructions, if the
permutation is found alreadyi. I.e. some scalars are transformed by the
permutation of previously vectorized nodes, and some are inserted
directly.

Reworked part of D110978

Differential Revision: https://reviews.llvm.org/D146564
2023-04-05 09:37:05 -07:00

37 lines
1.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse4.2 | FileCheck %s
@a = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
@b = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 8, i32 poison, i32 ptrtoint (ptr @fn1 to i32), i32 poison>, <4 x i32> <i32 4, i32 1, i32 6, i32 6>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @a, align 4
; CHECK-NEXT: ret i32 0
;
entry:
%0 = load i32, ptr @b, align 4
%cmp = icmp sgt i32 %0, 0
%cond = select i1 %cmp, i32 8, i32 0
store i32 %cond, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 3), align 4
%1 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 1), align 4
%cmp1 = icmp sgt i32 %1, 0
%. = select i1 %cmp1, i32 %1, i32 6
store i32 %., ptr @a, align 4
%2 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 2), align 4
%cmp4 = icmp sgt i32 %2, 0
%3 = select i1 %cmp4, i32 ptrtoint (ptr @fn1 to i32), i32 0
store i32 %3, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 1), align 4
%4 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 3), align 4
%cmp6 = icmp sgt i32 %4, 0
%5 = select i1 %cmp6, i32 ptrtoint (ptr @fn1 to i32), i32 0
store i32 %5, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 2), align 4
ret i32 0
}