Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
Alexey Bataev bd05376986 [SLP]Improve multinode analysis.
Changes the preliminary multinode analysis:
1. Introduced scores for reversed loads/extractelements.
2. Improved shallow score calculation.
3. Lowered the cost of external uses (no need to consider it several times, just ones).
4. The initial lane for analysis is the one with the minimal possible
   reorderings.

These changes in general shall reduce compile time and improve the
reordering in many cases.

Part of D57059.

Differential Revision: https://reviews.llvm.org/D101109
2021-12-14 06:01:52 -08:00

112 lines
5.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 | FileCheck %s
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -slp-threshold=-1 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH1
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -slp-threshold=-2 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH2
@a = global float 0.000000e+00, align 4
define float @f(<2 x float> %x) {
; CHECK-LABEL: @f(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f(
; THRESH1-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
; THRESH1-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f(
; THRESH2-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
; THRESH2-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
%x0x0 = fmul float %x0, %x0
%x1x1 = fmul float %x1, %x1
%add = fadd float %x0x0, %x1x1
ret float %add
}
define float @f_used_out_of_tree(<2 x float> %x) {
; CHECK-LABEL: @f_used_out_of_tree(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]]
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
; CHECK-NEXT: store float [[ADD]], float* @a, align 4
; CHECK-NEXT: ret float [[X0]]
;
; THRESH1-LABEL: @f_used_out_of_tree(
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH1-NEXT: store float [[ADD]], float* @a, align 4
; THRESH1-NEXT: ret float [[TMP1]]
;
; THRESH2-LABEL: @f_used_out_of_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH2-NEXT: store float [[ADD]], float* @a, align 4
; THRESH2-NEXT: ret float [[TMP1]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
%x0x0 = fmul float %x0, %x0
%x1x1 = fmul float %x1, %x1
%add = fadd float %x0x0, %x1x1
store float %add, float* @a
ret float %x0
}
define float @f_used_twice_in_tree(<2 x float> %x) {
; CHECK-LABEL: @f_used_twice_in_tree(
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]]
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f_used_twice_in_tree(
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
%x1 = extractelement <2 x float> %x, i32 1
%x0x0 = fmul float %x0, %x1
%x1x1 = fmul float %x1, %x1
%add = fadd float %x0x0, %x1x1
ret float %add
}