back into a vector Previously the cost of the existing ExtractElement/ExtractValue instructions was considered as a dead cost only if it was detected that they have only one use. But these instructions may be considered dead also if users of the instructions are also going to be vectorized, like: ``` %x0 = extractelement <2 x float> %x, i32 0 %x1 = extractelement <2 x float> %x, i32 1 %x0x0 = fmul float %x0, %x0 %x1x1 = fmul float %x1, %x1 %add = fadd float %x0x0, %x1x1 ``` This can be transformed to ``` %1 = fmul <2 x float> %x, %x %2 = extractelement <2 x float> %1, i32 0 %3 = extractelement <2 x float> %1, i32 1 %add = fadd float %2, %3 ``` because though `%x0` and `%x1` have 2 users each other, these users are part of the vectorized tree and we can consider these `extractelement` instructions as dead. Differential Revision: https://reviews.llvm.org/D29900 llvm-svn: 295056
62 lines
2.7 KiB
LLVM
62 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 | FileCheck %s
|
|
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -slp-threshold=-1 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH1
|
|
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -slp-threshold=-2 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH2
|
|
|
|
@a = global float 0.000000e+00, align 4
|
|
|
|
define float @f(<2 x float> %x) {
|
|
; CHECK-LABEL: @f(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]]
|
|
; CHECK-NEXT: ret float [[ADD]]
|
|
;
|
|
%x0 = extractelement <2 x float> %x, i32 0
|
|
%x1 = extractelement <2 x float> %x, i32 1
|
|
%x0x0 = fmul float %x0, %x0
|
|
%x1x1 = fmul float %x1, %x1
|
|
%add = fadd float %x0x0, %x1x1
|
|
ret float %add
|
|
}
|
|
|
|
define float @f_used_out_of_tree(<2 x float> %x) {
|
|
; THRESH2-LABEL: @f_used_out_of_tree(
|
|
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
|
|
; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]]
|
|
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
|
|
; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
|
|
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
|
|
; THRESH2-NEXT: store float [[ADD]], float* @a
|
|
; THRESH2-NEXT: ret float [[TMP1]]
|
|
;
|
|
%x0 = extractelement <2 x float> %x, i32 0
|
|
%x1 = extractelement <2 x float> %x, i32 1
|
|
%x0x0 = fmul float %x0, %x0
|
|
%x1x1 = fmul float %x1, %x1
|
|
%add = fadd float %x0x0, %x1x1
|
|
store float %add, float* @a
|
|
ret float %x0
|
|
}
|
|
|
|
define float @f_used_twice_in_tree(<2 x float> %x) {
|
|
; THRESH1-LABEL: @f_used_twice_in_tree(
|
|
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
|
|
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
|
|
; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
|
|
; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[X]], [[TMP3]]
|
|
; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
|
|
; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
|
|
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
|
|
; THRESH1-NEXT: ret float [[ADD]]
|
|
;
|
|
%x0 = extractelement <2 x float> %x, i32 0
|
|
%x1 = extractelement <2 x float> %x, i32 1
|
|
%x0x0 = fmul float %x0, %x1
|
|
%x1x1 = fmul float %x1, %x1
|
|
%add = fadd float %x0x0, %x1x1
|
|
ret float %add
|
|
}
|
|
|