This patch makes SLP and LV emit operations with initial vectors set to poison constant instead of undef. This is a part of efforts for using poison vector instead of undef to represent "doesn't care" vector. The goal is to make nice shufflevector optimizations valid that is currently incorrect due to the tricky interaction between undef and poison (see https://bugs.llvm.org/show_bug.cgi?id=44185 ). Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D94061
97 lines
4.4 KiB
LLVM
97 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
%structA = type { [2 x float] }
|
|
|
|
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
|
|
; CHECK: for.body3.lr.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
|
|
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
|
|
; CHECK: for.end27:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body3.lr.ph
|
|
|
|
for.body3.lr.ph:
|
|
%conv5 = sitofp i32 %ymin to float
|
|
%conv = sitofp i32 %xmin to float
|
|
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
|
%0 = load float, float* %arrayidx4, align 4
|
|
%sub = fsub fast float %conv, %0
|
|
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
|
%1 = load float, float* %arrayidx9, align 4
|
|
%sub10 = fsub fast float %conv5, %1
|
|
%mul11 = fmul fast float %sub, %sub
|
|
%mul12 = fmul fast float %sub10, %sub10
|
|
%add = fadd fast float %mul11, %mul12
|
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
|
|
|
for.end27:
|
|
ret void
|
|
}
|
|
|
|
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
|
|
; CHECK: for.body3.lr.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
|
|
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
|
|
; CHECK: for.end27:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body3.lr.ph
|
|
|
|
for.body3.lr.ph:
|
|
%conv5 = sitofp i32 %ymin to float
|
|
%conv = sitofp i32 %xmin to float
|
|
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
|
%0 = load float, float* %arrayidx4, align 4
|
|
%sub = fsub fast float %conv, %0
|
|
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
|
%1 = load float, float* %arrayidx9, align 4
|
|
%sub10 = fsub fast float %conv5, %1
|
|
%mul11 = fmul fast float %sub, %sub
|
|
%mul12 = fmul fast float %sub10, %sub10
|
|
%add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
|
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
|
|
|
for.end27:
|
|
ret void
|
|
}
|