Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
215 lines
8.0 KiB
LLVM
215 lines
8.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
|
|
|
|
; Following test cases check:
|
|
; a / D; b / D; c / D;
|
|
; =>
|
|
; recip = 1.0 / D; a * recip; b * recip; c * recip;
|
|
define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
|
|
; CHECK-LABEL: three_fdiv_float:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov s4, #1.00000000
|
|
; CHECK-NEXT: fdiv s4, s4, s0
|
|
; CHECK-NEXT: fmul s0, s1, s4
|
|
; CHECK-NEXT: fmul s1, s2, s4
|
|
; CHECK-NEXT: fmul s2, s3, s4
|
|
; CHECK-NEXT: b foo_3f
|
|
%div = fdiv float %a, %D
|
|
%div1 = fdiv float %b, %D
|
|
%div2 = fdiv float %c, %D
|
|
tail call void @foo_3f(float %div, float %div1, float %div2)
|
|
ret void
|
|
}
|
|
|
|
define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
|
|
; CHECK-LABEL: three_fdiv_double:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov d4, #1.00000000
|
|
; CHECK-NEXT: fdiv d4, d4, d0
|
|
; CHECK-NEXT: fmul d0, d1, d4
|
|
; CHECK-NEXT: fmul d1, d2, d4
|
|
; CHECK-NEXT: fmul d2, d3, d4
|
|
; CHECK-NEXT: b foo_3d
|
|
%div = fdiv double %a, %D
|
|
%div1 = fdiv double %b, %D
|
|
%div2 = fdiv double %c, %D
|
|
tail call void @foo_3d(double %div, double %div1, double %div2)
|
|
ret void
|
|
}
|
|
|
|
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
|
|
; CHECK-LABEL: three_fdiv_4xfloat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov v4.4s, #1.00000000
|
|
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
|
|
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
|
|
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
|
|
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
|
|
; CHECK-NEXT: b foo_3_4xf
|
|
%div = fdiv <4 x float> %a, %D
|
|
%div1 = fdiv <4 x float> %b, %D
|
|
%div2 = fdiv <4 x float> %c, %D
|
|
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
|
|
ret void
|
|
}
|
|
|
|
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
|
|
; CHECK-LABEL: three_fdiv_2xdouble:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov v4.2d, #1.00000000
|
|
; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
|
|
; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
|
|
; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
|
|
; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
|
|
; CHECK-NEXT: b foo_3_2xd
|
|
%div = fdiv <2 x double> %a, %D
|
|
%div1 = fdiv <2 x double> %b, %D
|
|
%div2 = fdiv <2 x double> %c, %D
|
|
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
|
|
ret void
|
|
}
|
|
|
|
; Following test cases check we never combine two FDIVs if neither of them
|
|
; calculates a reciprocal.
|
|
define void @two_fdiv_float(float %D, float %a, float %b) #0 {
|
|
; CHECK-LABEL: two_fdiv_float:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fdiv s3, s1, s0
|
|
; CHECK-NEXT: fdiv s1, s2, s0
|
|
; CHECK-NEXT: fmov s0, s3
|
|
; CHECK-NEXT: b foo_2f
|
|
%div = fdiv float %a, %D
|
|
%div1 = fdiv float %b, %D
|
|
tail call void @foo_2f(float %div, float %div1)
|
|
ret void
|
|
}
|
|
|
|
define void @two_fdiv_double(double %D, double %a, double %b) #0 {
|
|
; CHECK-LABEL: two_fdiv_double:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fdiv d3, d1, d0
|
|
; CHECK-NEXT: fdiv d1, d2, d0
|
|
; CHECK-NEXT: fmov d0, d3
|
|
; CHECK-NEXT: b foo_2d
|
|
%div = fdiv double %a, %D
|
|
%div1 = fdiv double %b, %D
|
|
tail call void @foo_2d(double %div, double %div1)
|
|
ret void
|
|
}
|
|
|
|
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
|
|
; CHECK-LABEL: splat_three_fdiv_4xfloat:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: fmov v4.4s, #1.00000000
|
|
; CHECK-NEXT: dup v0.4s, v0.s[0]
|
|
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
|
|
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
|
|
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
|
|
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
|
|
; CHECK-NEXT: b foo_3_4xf
|
|
%D.ins = insertelement <4 x float> poison, float %D, i64 0
|
|
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
|
|
%div = fdiv <4 x float> %a, %splat
|
|
%div1 = fdiv <4 x float> %b, %splat
|
|
%div2 = fdiv <4 x float> %c, %splat
|
|
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
|
|
ret void
|
|
}
|
|
|
|
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
|
|
; CHECK-LABEL: splat_fdiv_v4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: fmov v2.4s, #1.00000000
|
|
; CHECK-NEXT: dup v0.4s, v0.s[0]
|
|
; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s
|
|
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%D.ins = insertelement <4 x float> poison, float %D, i64 0
|
|
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
|
|
%div = fdiv <4 x float> %a, %splat
|
|
ret <4 x float> %div
|
|
}
|
|
|
|
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
|
|
; CHECK-LABEL: splat_fdiv_nxv4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov s2, #1.00000000
|
|
; CHECK-NEXT: fdiv s0, s2, s0
|
|
; CHECK-NEXT: mov z0.s, s0
|
|
; CHECK-NEXT: fmul z0.s, z1.s, z0.s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
|
|
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
|
|
%div = fdiv <vscale x 4 x float> %a, %splat
|
|
ret <vscale x 4 x float> %div
|
|
}
|
|
|
|
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
|
|
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov s4, #1.00000000
|
|
; CHECK-NEXT: fdiv s0, s4, s0
|
|
; CHECK-NEXT: mov z4.s, s0
|
|
; CHECK-NEXT: fmul z0.s, z1.s, z4.s
|
|
; CHECK-NEXT: fmul z1.s, z2.s, z4.s
|
|
; CHECK-NEXT: fmul z2.s, z3.s, z4.s
|
|
; CHECK-NEXT: b foo_3_nxv4f32
|
|
entry:
|
|
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
|
|
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
|
|
%div = fdiv <vscale x 4 x float> %a, %splat
|
|
%div1 = fdiv <vscale x 4 x float> %b, %splat
|
|
%div2 = fdiv <vscale x 4 x float> %c, %splat
|
|
tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
|
|
ret void
|
|
}
|
|
|
|
define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
|
|
; CHECK-LABEL: splat_fdiv_nxv2f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: mov z0.d, d0
|
|
; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
|
|
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
|
|
%div = fdiv <vscale x 2 x double> %a, %splat
|
|
ret <vscale x 2 x double> %div
|
|
}
|
|
|
|
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
|
|
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov d3, #1.00000000
|
|
; CHECK-NEXT: fdiv d0, d3, d0
|
|
; CHECK-NEXT: mov z3.d, d0
|
|
; CHECK-NEXT: fmul z0.d, z1.d, z3.d
|
|
; CHECK-NEXT: fmul z1.d, z2.d, z3.d
|
|
; CHECK-NEXT: b foo_2_nxv2f64
|
|
entry:
|
|
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
|
|
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
|
|
%div = fdiv <vscale x 2 x double> %a, %splat
|
|
%div1 = fdiv <vscale x 2 x double> %b, %splat
|
|
tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
|
|
ret void
|
|
}
|
|
|
|
declare void @foo_3f(float, float, float)
|
|
declare void @foo_3d(double, double, double)
|
|
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
|
|
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
|
|
declare void @foo_2f(float, float)
|
|
declare void @foo_2d(double, double)
|
|
declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
|
declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
|
|
|
|
attributes #0 = { "unsafe-fp-math"="true" }
|
|
attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
|