I propose that we go ahead and enabled SLP by default. Over the last few weeks, @luke and I have been working through codegen issues seen at small VLs from a couple of SPEC workloads. We still have a ways to go to get optimal codegen, but we're at the point where having a single configuration we're all tuning against is probably the right default.
As a bit of history, I introduced this TTI hook back in a310637132 back in August of last year to unblock enabling LoopVectorizer. At the time, we had a couple known issues: constant materialization, address generation, and a general lack of maturity of small fixed vector codegen. By now, each of these has had significant investment. I can't say any of them are completely fixed, but we're no longer seeing instances of them every place we look.
What we're mostly seeing at this point is a long tail of code gen opportunities, many involving build vectors, shuffles, and extract patterns. I have a couple patches up to continue iterating on those issues, but I don't think they need to be blockers for enabling SLP.
Differential Revision: https://reviews.llvm.org/D152750
337 lines
13 KiB
LLVM
337 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
|
|
; RUN: | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: | FileCheck %s --check-prefix=DEFAULT
|
|
|
|
define void @fp_add(ptr %dst, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: define void @fp_add
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_add
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%f0 = load float, ptr %q, align 4
|
|
%pf1 = getelementptr inbounds float, ptr %q, i64 1
|
|
%f1 = load float, ptr %pf1, align 4
|
|
%pf2 = getelementptr inbounds float, ptr %q, i64 2
|
|
%f2 = load float, ptr %pf2, align 4
|
|
%pf3 = getelementptr inbounds float, ptr %q, i64 3
|
|
%f3 = load float, ptr %pf3, align 4
|
|
|
|
%a0 = fadd float %e0, %f0
|
|
%a1 = fadd float %e1, %f1
|
|
%a2 = fadd float %e2, %f2
|
|
%a3 = fadd float %e3, %f3
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @fp_sub(ptr %dst, ptr %p) {
|
|
; CHECK-LABEL: define void @fp_sub
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
|
|
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_sub
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%a0 = fsub float %e0, 3.0
|
|
%a1 = fsub float %e1, 3.0
|
|
%a2 = fsub float %e2, 3.0
|
|
%a3 = fsub float %e3, 3.0
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @fp_mul(ptr %dst, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: define void @fp_mul
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_mul
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%f0 = load float, ptr %q, align 4
|
|
%pf1 = getelementptr inbounds float, ptr %q, i64 1
|
|
%f1 = load float, ptr %pf1, align 4
|
|
%pf2 = getelementptr inbounds float, ptr %q, i64 2
|
|
%f2 = load float, ptr %pf2, align 4
|
|
%pf3 = getelementptr inbounds float, ptr %q, i64 3
|
|
%f3 = load float, ptr %pf3, align 4
|
|
|
|
%a0 = fmul float %e0, %f0
|
|
%a1 = fmul float %e1, %f1
|
|
%a2 = fmul float %e2, %f2
|
|
%a3 = fmul float %e3, %f3
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @fp_div(ptr %dst, ptr %p) {
|
|
; CHECK-LABEL: define void @fp_div
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
|
|
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_div
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%a0 = fdiv float %e0, 10.5
|
|
%a1 = fdiv float %e1, 10.5
|
|
%a2 = fdiv float %e2, 10.5
|
|
%a3 = fdiv float %e3, 10.5
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.maxnum.f32(float, float)
|
|
|
|
define void @fp_max(ptr %dst, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: define void @fp_max
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
|
|
; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_max
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%f0 = load float, ptr %q, align 4
|
|
%pf1 = getelementptr inbounds float, ptr %q, i64 1
|
|
%f1 = load float, ptr %pf1, align 4
|
|
%pf2 = getelementptr inbounds float, ptr %q, i64 2
|
|
%f2 = load float, ptr %pf2, align 4
|
|
%pf3 = getelementptr inbounds float, ptr %q, i64 3
|
|
%f3 = load float, ptr %pf3, align 4
|
|
|
|
%a0 = tail call float @llvm.maxnum.f32(float %e0, float %f0)
|
|
%a1 = tail call float @llvm.maxnum.f32(float %e1, float %f1)
|
|
%a2 = tail call float @llvm.maxnum.f32(float %e2, float %f2)
|
|
%a3 = tail call float @llvm.maxnum.f32(float %e3, float %f3)
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.minnum.f32(float, float)
|
|
|
|
define void @fp_min(ptr %dst, ptr %p) {
|
|
; CHECK-LABEL: define void @fp_min
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
|
|
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_min
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
|
|
; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%a0 = tail call float @llvm.minnum.f32(float %e0, float 1.25)
|
|
%a1 = tail call float @llvm.minnum.f32(float %e1, float 1.25)
|
|
%a2 = tail call float @llvm.minnum.f32(float %e2, float 1.25)
|
|
%a3 = tail call float @llvm.minnum.f32(float %e3, float 1.25)
|
|
|
|
store float %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds float, ptr %dst, i64 1
|
|
store float %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds float, ptr %dst, i64 2
|
|
store float %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds float, ptr %dst, i64 3
|
|
store float %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.fptosi.sat.i32.f32(float)
|
|
|
|
define void @fp_convert(ptr %dst, ptr %p) {
|
|
; CHECK-LABEL: define void @fp_convert
|
|
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @fp_convert
|
|
; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
entry:
|
|
%e0 = load float, ptr %p, align 4
|
|
%pe1 = getelementptr inbounds float, ptr %p, i64 1
|
|
%e1 = load float, ptr %pe1, align 4
|
|
%pe2 = getelementptr inbounds float, ptr %p, i64 2
|
|
%e2 = load float, ptr %pe2, align 4
|
|
%pe3 = getelementptr inbounds float, ptr %p, i64 3
|
|
%e3 = load float, ptr %pe3, align 4
|
|
|
|
%a0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e0)
|
|
%a1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e1)
|
|
%a2 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e2)
|
|
%a3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e3)
|
|
|
|
store i32 %a0, ptr %dst, align 4
|
|
%pa1 = getelementptr inbounds i32, ptr %dst, i64 1
|
|
store i32 %a1, ptr %pa1, align 4
|
|
%pa2 = getelementptr inbounds i32, ptr %dst, i64 2
|
|
store i32 %a2, ptr %pa2, align 4
|
|
%pa3 = getelementptr inbounds i32, ptr %dst, i64 3
|
|
store i32 %a3, ptr %pa3, align 4
|
|
|
|
ret void
|
|
}
|