Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
Juneyoung Lee 4a8e6ed2f7 [SLP,LV] Use poison constant vector for shufflevector/initial insertelement
This patch makes SLP and LV emit operations with initial vectors set to poison constant instead of undef.
This is a part of efforts for using poison vector instead of undef to represent "doesn't care" vector.
The goal is to make nice shufflevector optimizations valid that is currently incorrect due to the tricky interaction between undef and poison (see https://bugs.llvm.org/show_bug.cgi?id=44185 ).

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94061
2021-01-06 11:22:50 +09:00

97 lines
4.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
%structA = type { [2 x float] }
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
; CHECK: for.body3.lr.ph:
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
; CHECK: for.end27:
; CHECK-NEXT: ret void
;
entry:
br label %for.body3.lr.ph
for.body3.lr.ph:
%conv5 = sitofp i32 %ymin to float
%conv = sitofp i32 %xmin to float
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
%0 = load float, float* %arrayidx4, align 4
%sub = fsub fast float %conv, %0
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
%1 = load float, float* %arrayidx9, align 4
%sub10 = fsub fast float %conv5, %1
%mul11 = fmul fast float %sub, %sub
%mul12 = fmul fast float %sub10, %sub10
%add = fadd fast float %mul11, %mul12
%cmp = fcmp oeq float %add, 0.000000e+00
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
for.end27:
ret void
}
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]]
; CHECK: for.body3.lr.ph:
; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCTA]], %structA* [[J]], i64 0, i32 0, i64 1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
; CHECK: for.end27:
; CHECK-NEXT: ret void
;
entry:
br label %for.body3.lr.ph
for.body3.lr.ph:
%conv5 = sitofp i32 %ymin to float
%conv = sitofp i32 %xmin to float
%arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
%0 = load float, float* %arrayidx4, align 4
%sub = fsub fast float %conv, %0
%arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
%1 = load float, float* %arrayidx9, align 4
%sub10 = fsub fast float %conv5, %1
%mul11 = fmul fast float %sub, %sub
%mul12 = fmul fast float %sub10, %sub10
%add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
%cmp = fcmp oeq float %add, 0.000000e+00
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
for.end27:
ret void
}