Files
clang-p2996/llvm/test/CodeGen/AArch64/faddp.ll
David Green 68f34e4d39 [AArch64] Add tablegen patterns for faddp of two extracts
This adds some simple tablegen patterns for converting
`faddp v2f32 extractlow(Rn), v2f32 extracthigh(Rn)` to
`faddp v4f32 Rn, v4f32 Rn` using the q variants of the
instructions, avoiding the extra ext needed to extract
the high lanes. Only the bottom lanes of the new faddp
are used, the second Rn operand is used as a placeholder.
It uses Rn to prevent any false dependencies, but could
equally by undef.

Differential Revision: https://reviews.llvm.org/D152245
2023-06-19 07:48:31 +01:00

298 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple aarch64 -mattr=+fullfp16 < %s | FileCheck %s
define float @faddp_2xfloat(<2 x float> %a) {
; CHECK-LABEL: faddp_2xfloat:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
%0 = fadd <2 x float> %a, %shift
%1 = extractelement <2 x float> %0, i32 0
ret float %1
}
define float @faddp_4xfloat(<4 x float> %a) {
; CHECK-LABEL: faddp_4xfloat:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%0 = fadd <4 x float> %a, %shift
%1 = extractelement <4 x float> %0, i32 0
ret float %1
}
define float @faddp_4xfloat_commute(<4 x float> %a) {
; CHECK-LABEL: faddp_4xfloat_commute:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%0 = fadd <4 x float> %shift, %a
%1 = extractelement <4 x float> %0, i32 0
ret float %1
}
define float @faddp_2xfloat_commute(<2 x float> %a) {
; CHECK-LABEL: faddp_2xfloat_commute:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
%0 = fadd <2 x float> %shift, %a
%1 = extractelement <2 x float> %0, i32 0
ret float %1
}
define double @faddp_2xdouble(<2 x double> %a) {
; CHECK-LABEL: faddp_2xdouble:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
%0 = fadd <2 x double> %a, %shift
%1 = extractelement <2 x double> %0, i32 0
ret double %1
}
define double @faddp_2xdouble_commute(<2 x double> %a) {
; CHECK-LABEL: faddp_2xdouble_commute:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
%0 = fadd <2 x double> %shift, %a
%1 = extractelement <2 x double> %0, i32 0
ret double %1
}
define i64 @addp_2xi64(<2 x i64> %a) {
; CHECK-LABEL: addp_2xi64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
%0 = add <2 x i64> %a, %shift
%1 = extractelement <2 x i64> %0, i32 0
ret i64 %1
}
define i64 @addp_2xi64_commute(<2 x i64> %a) {
; CHECK-LABEL: addp_2xi64_commute:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
%0 = add <2 x i64> %shift, %a
%1 = extractelement <2 x i64> %0, i32 0
ret i64 %1
}
define float @faddp_2xfloat_strict(<2 x float> %a) #0 {
; CHECK-LABEL: faddp_2xfloat_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
%0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <2 x float> %0, i32 0
ret float %1
}
define float @faddp_4xfloat_strict(<4 x float> %a) #0 {
; CHECK-LABEL: faddp_4xfloat_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <4 x float> %0, i32 0
ret float %1
}
define float @faddp_4xfloat_commute_strict(<4 x float> %a) #0 {
; CHECK-LABEL: faddp_4xfloat_commute_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %shift, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <4 x float> %0, i32 0
ret float %1
}
define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 {
; CHECK-LABEL: faddp_2xfloat_commute_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
%0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %shift, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <2 x float> %0, i32 0
ret float %1
}
define double @faddp_2xdouble_strict(<2 x double> %a) #0 {
; CHECK-LABEL: faddp_2xdouble_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
%0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <2 x double> %0, i32 0
ret double %1
}
define double @faddp_2xdouble_commute_strict(<2 x double> %a) #0 {
; CHECK-LABEL: faddp_2xdouble_commute_strict:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: ret
entry:
%shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
%0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %shift, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%1 = extractelement <2 x double> %0, i32 0
ret double %1
}
define <2 x double> @addp_v2f64(<2 x double> %a) {
; CHECK-LABEL: addp_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
; CHECK-NEXT: ret
entry:
%s = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
%b = fadd reassoc <2 x double> %s, %a
ret <2 x double> %b
}
define <4 x double> @addp_v4f64(<4 x double> %a) {
; CHECK-LABEL: addp_v4f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp v1.2d, v0.2d, v1.2d
; CHECK-NEXT: dup v0.2d, v1.d[0]
; CHECK-NEXT: dup v1.2d, v1.d[1]
; CHECK-NEXT: ret
entry:
%s = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%b = fadd reassoc <4 x double> %s, %a
ret <4 x double> %b
}
define <4 x float> @addp_v4f32(<4 x float> %a) {
; CHECK-LABEL: addp_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rev64 v1.4s, v0.4s
; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
entry:
%s = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%b = fadd reassoc <4 x float> %s, %a
ret <4 x float> %b
}
define <8 x float> @addp_v8f32(<8 x float> %a) {
; CHECK-LABEL: addp_v8f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rev64 v2.4s, v0.4s
; CHECK-NEXT: rev64 v3.4s, v1.4s
; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
; CHECK-NEXT: fadd v1.4s, v3.4s, v1.4s
; CHECK-NEXT: ret
entry:
%s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
%b = fadd <8 x float> %s, %a
ret <8 x float> %b
}
define <8 x float> @addp_v8f32_slow(<8 x float> %a) {
; CHECK-LABEL: addp_v8f32_slow:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s
; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s
; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s
; CHECK-NEXT: ret
entry:
%s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
%b = fadd reassoc <8 x float> %s, %a
ret <8 x float> %b
}
define <16 x float> @addp_v16f32(<16 x float> %a) {
; CHECK-LABEL: addp_v16f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: faddp v3.4s, v2.4s, v3.4s
; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s
; CHECK-NEXT: zip1 v2.4s, v3.4s, v3.4s
; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s
; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s
; CHECK-NEXT: zip2 v3.4s, v3.4s, v3.4s
; CHECK-NEXT: ret
entry:
%s = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
%b = fadd reassoc <16 x float> %s, %a
ret <16 x float> %b
}
define float @faddp_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: faddp_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%1 = fadd <4 x float> %a, %b
%2 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 0, i32 1>
%3 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 2, i32 3>
%4 = tail call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %2, <2 x float> %3)
%5 = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
%6 = fadd <2 x float> %4, %5
%7 = extractelement <2 x float> %6, i64 0
ret float %7
}
define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: faddp_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%1 = fadd <8 x half> %a, %b
%2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%4 = tail call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %2, <4 x half> %3)
ret <4 x half> %4
}
declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
declare <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half>, <4 x half>)
attributes #0 = { strictfp }
declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)