v8f16 is a legal type but promoting to v16f16 would result in an illegal type. Let's legalize these by a combination of splitting+promoting resulting in a pair of v4f16. Also, we were being overly cautious with different v4f16 nodes. Mark more of them safe to promote to v4f32.
212 lines
7.8 KiB
LLVM
212 lines
7.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s
|
|
; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16
|
|
|
|
define half @faddp_2xhalf(<2 x half> %a) {
|
|
; CHECK-LABEL: faddp_2xhalf:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_2xhalf:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
|
|
%0 = fadd <2 x half> %a, %shift
|
|
%1 = extractelement <2 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define half @faddp_2xhalf_commute(<2 x half> %a) {
|
|
; CHECK-LABEL: faddp_2xhalf_commute:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
|
|
%0 = fadd <2 x half> %shift, %a
|
|
%1 = extractelement <2 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define half @faddp_4xhalf(<4 x half> %a) {
|
|
; CHECK-LABEL: faddp_4xhalf:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_4xhalf:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
%0 = fadd <4 x half> %a, %shift
|
|
%1 = extractelement <4 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define half @faddp_4xhalf_commute(<4 x half> %a) {
|
|
; CHECK-LABEL: faddp_4xhalf_commute:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
%0 = fadd <4 x half> %shift, %a
|
|
%1 = extractelement <4 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define half @faddp_8xhalf(<8 x half> %a) {
|
|
; CHECK-LABEL: faddp_8xhalf:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_8xhalf:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s
|
|
; CHECKNOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%0 = fadd <8 x half> %a, %shift
|
|
%1 = extractelement <8 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define half @faddp_8xhalf_commute(<8 x half> %a) {
|
|
; CHECK-LABEL: faddp_8xhalf_commute:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: faddp h0, v0.2h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: faddp_8xhalf_commute:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1]
|
|
; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
|
|
; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%0 = fadd <8 x half> %shift, %a
|
|
%1 = extractelement <8 x half> %0, i32 0
|
|
ret half %1
|
|
}
|
|
|
|
define <8 x half> @addp_v8f16(<8 x half> %a) {
|
|
; CHECK-LABEL: addp_v8f16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev32 v1.8h, v0.8h
|
|
; CHECK-NEXT: fadd v0.8h, v1.8h, v0.8h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: addp_v8f16:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: rev32 v1.8h, v0.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
|
|
; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%s = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
|
|
%b = fadd reassoc <8 x half> %s, %a
|
|
ret <8 x half> %b
|
|
}
|
|
|
|
define <16 x half> @addp_v16f16(<16 x half> %a) {
|
|
; CHECK-LABEL: addp_v16f16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: faddp v1.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: zip1 v0.8h, v1.8h, v1.8h
|
|
; CHECK-NEXT: zip2 v1.8h, v1.8h, v1.8h
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECKNOFP16-LABEL: addp_v16f16:
|
|
; CHECKNOFP16: // %bb.0: // %entry
|
|
; CHECKNOFP16-NEXT: rev32 v2.8h, v0.8h
|
|
; CHECKNOFP16-NEXT: rev32 v3.8h, v1.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v6.4s, v1.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECKNOFP16-NEXT: fcvtl v5.4s, v2.4h
|
|
; CHECKNOFP16-NEXT: fcvtl v7.4s, v3.4h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECKNOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECKNOFP16-NEXT: fadd v4.4s, v5.4s, v4.4s
|
|
; CHECKNOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
|
|
; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v0.4s
|
|
; CHECKNOFP16-NEXT: fadd v3.4s, v3.4s, v1.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v0.4h, v4.4s
|
|
; CHECKNOFP16-NEXT: fcvtn v1.4h, v5.4s
|
|
; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECKNOFP16-NEXT: fcvtn2 v1.8h, v3.4s
|
|
; CHECKNOFP16-NEXT: ret
|
|
entry:
|
|
%s = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
|
|
%b = fadd reassoc <16 x half> %s, %a
|
|
ret <16 x half> %b
|
|
}
|