Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
Alexey Bataev b10ecfa914 [SLP]Represent externally used values as original scalars, if profitable.
Currently SLP vectorizer tries to keep only GEPs as scalar, if they are
vectorized but used externally. Same approach can be used for all scalar
values. This patch tries to keep original scalars if all its operands
remain scalar or externally used, the cost of the original scalar is
lower than the cost of the extractelement instruction, or if the number
of externally used scalars in the same entry is power of 2. Last
criterion allows better revectorization for multiply used scalars.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/100904
2024-08-12 10:15:02 -04:00

129 lines
5.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; int foo(int * restrict B, int * restrict A, int n, int m) {
; B[0] = n * A[0] + m * A[0];
; B[1] = n * A[1] + m * A[1];
; B[2] = n * A[2] + m * A[2];
; B[3] = n * A[3] + m * A[3];
; return 0;
; }
define i32 @foo(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP0]], [[TMP2]]
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[B:%.*]], align 4
; CHECK-NEXT: ret i32 0
;
entry:
%0 = load i32, ptr %A, align 4
%mul238 = add i32 %m, %n
%add = mul i32 %0, %mul238
store i32 %add, ptr %B, align 4
%arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
%1 = load i32, ptr %arrayidx4, align 4
%add8 = mul i32 %1, %mul238
%arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
store i32 %add8, ptr %arrayidx9, align 4
%arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
%2 = load i32, ptr %arrayidx10, align 4
%add14 = mul i32 %2, %mul238
%arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
store i32 %add14, ptr %arrayidx15, align 4
%arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
%3 = load i32, ptr %arrayidx16, align 4
%add20 = mul i32 %3, %mul238
%arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
store i32 %add20, ptr %arrayidx21, align 4
ret i32 0
}
; int extr_user(int * restrict B, int * restrict A, int n, int m) {
; B[0] = n * A[0] + m * A[0];
; B[1] = n * A[1] + m * A[1];
; B[2] = n * A[2] + m * A[2];
; B[3] = n * A[3] + m * A[3];
; return A[0];
; }
define i32 @extr_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) {
; CHECK-LABEL: @extr_user(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP0]], [[TMP3]]
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[B:%.*]], align 4
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
%0 = load i32, ptr %A, align 4
%mul238 = add i32 %m, %n
%add = mul i32 %0, %mul238
store i32 %add, ptr %B, align 4
%arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
%1 = load i32, ptr %arrayidx4, align 4
%add8 = mul i32 %1, %mul238
%arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
store i32 %add8, ptr %arrayidx9, align 4
%arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
%2 = load i32, ptr %arrayidx10, align 4
%add14 = mul i32 %2, %mul238
%arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
store i32 %add14, ptr %arrayidx15, align 4
%arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
%3 = load i32, ptr %arrayidx16, align 4
%add20 = mul i32 %3, %mul238
%arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
store i32 %add20, ptr %arrayidx21, align 4
ret i32 %0 ;<--------- This value has multiple users
}
; In this example we have an external user that is not the first element in the vector.
define i32 @extr_user1(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) {
; CHECK-LABEL: @extr_user1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP0]], [[TMP3]]
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[B:%.*]], align 4
; CHECK-NEXT: ret i32 [[TMP1]]
;
entry:
%0 = load i32, ptr %A, align 4
%mul238 = add i32 %m, %n
%add = mul i32 %0, %mul238
store i32 %add, ptr %B, align 4
%arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
%1 = load i32, ptr %arrayidx4, align 4
%add8 = mul i32 %1, %mul238
%arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
store i32 %add8, ptr %arrayidx9, align 4
%arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
%2 = load i32, ptr %arrayidx10, align 4
%add14 = mul i32 %2, %mul238
%arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
store i32 %add14, ptr %arrayidx15, align 4
%arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
%3 = load i32, ptr %arrayidx16, align 4
%add20 = mul i32 %3, %mul238
%arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
store i32 %add20, ptr %arrayidx21, align 4
ret i32 %1 ;<--------- This value has multiple users
}