If we vectorize a e.g. store, we leave around a bunch of getelementptrs for the individual scalar stores which we removed. We can go ahead and delete them as well. This is purely for test output quality and readability. It should have no effect in any sane pipeline. Differential Revision: https://reviews.llvm.org/D122493
95 lines
4.2 KiB
LLVM
95 lines
4.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64--linux-gnu"
|
|
|
|
%structA = type { [2 x float] }
|
|
|
|
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
|
|
; CHECK: for.body3.lr.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
|
|
; CHECK: for.end27:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body3.lr.ph
|
|
|
|
for.body3.lr.ph:
|
|
%conv5 = sitofp i32 %ymin to float
|
|
%conv = sitofp i32 %xmin to float
|
|
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
|
%0 = load float, float* %arrayidx4, align 4
|
|
%sub = fsub fast float %conv, %0
|
|
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
|
%1 = load float, float* %arrayidx9, align 4
|
|
%sub10 = fsub fast float %conv5, %1
|
|
%mul11 = fmul fast float %sub, %sub
|
|
%mul12 = fmul fast float %sub10, %sub10
|
|
%add = fadd fast float %mul11, %mul12
|
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
|
|
|
for.end27:
|
|
ret void
|
|
}
|
|
|
|
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
|
|
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
|
|
; CHECK: for.body3.lr.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
|
|
; CHECK: for.end27:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body3.lr.ph
|
|
|
|
for.body3.lr.ph:
|
|
%conv5 = sitofp i32 %ymin to float
|
|
%conv = sitofp i32 %xmin to float
|
|
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
|
|
%0 = load float, float* %arrayidx4, align 4
|
|
%sub = fsub fast float %conv, %0
|
|
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
|
|
%1 = load float, float* %arrayidx9, align 4
|
|
%sub10 = fsub fast float %conv5, %1
|
|
%mul11 = fmul fast float %sub, %sub
|
|
%mul12 = fmul fast float %sub10, %sub10
|
|
%add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
|
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
|
|
|
for.end27:
|
|
ret void
|
|
}
|