This is a follow up to #105455 which updates the VPIntrinsic mappings for the fadd and fmul cases, and supports both ordered and unordered reductions. This allows the use a single wider operation with a restricted EVL instead of padding the vector with the neutral element. This has all the same tradeoffs as the previous patch.
1041 lines
37 KiB
LLVM
1041 lines
37 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
|
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
|
|
|
|
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv1f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv1f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
|
|
|
|
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv2f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv2f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
|
|
|
|
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv4f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv4f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
|
|
|
|
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
|
|
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
|
|
|
|
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
|
|
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
|
|
|
|
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
|
|
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
|
|
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
|
|
ret float %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
|
|
|
|
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
|
|
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
|
|
|
|
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
|
|
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
|
|
|
|
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v12, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v12, fa0
|
|
; CHECK-NEXT: vfredosum.vs v8, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_fwadd_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
|
|
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
|
|
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
|
|
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
|
|
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
|
|
ret double %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
|
|
|
|
define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
|
|
|
|
define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv2f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
|
|
|
|
define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv4f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
|
|
|
|
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv64f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
|
|
; CHECK-NEXT: vfmin.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
|
|
|
|
define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
|
|
|
|
define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
|
|
|
|
define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
|
|
|
|
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv32f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
|
|
; CHECK-NEXT: vfmin.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
|
|
|
|
define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
|
|
|
|
define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
|
|
|
|
define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
|
|
|
|
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv16f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
|
|
; CHECK-NEXT: vfmin.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
|
|
|
|
define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) #0 {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
|
|
|
|
define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv2f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
|
|
|
|
define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv4f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
|
|
|
|
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv64f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
|
|
; CHECK-NEXT: vfmax.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
|
|
|
|
define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
|
|
|
|
define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
|
|
|
|
define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
|
|
|
|
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv32f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
|
|
; CHECK-NEXT: vfmax.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
|
|
|
|
define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
|
|
|
|
define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
|
|
|
|
define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv4f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
|
|
|
|
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv16f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
|
|
; CHECK-NEXT: vfmax.vv v8, v8, v16
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
|
|
ret double %red
|
|
}
|
|
|
|
define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
|
|
; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
|
|
ret float %red
|
|
}
|
|
|
|
; Test Widen VECREDUCE_SEQ_FADD
|
|
declare half @llvm.vector.reduce.fadd.nxv3f16(half, <vscale x 3 x half>)
|
|
|
|
define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv3f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: slli a1, a0, 1
|
|
; CHECK-NEXT: add a0, a1, a0
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfredosum.vs v9, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v9
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv3f16(half %s, <vscale x 3 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
|
|
|
|
define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a1, a0, 3
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub a0, a0, a1
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; CHECK-NEXT: vfredosum.vs v10, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v10
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
|
|
|
|
define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: li a1, 10
|
|
; CHECK-NEXT: mul a0, a0, a1
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v12, fa0
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
|
|
; CHECK-NEXT: vfredosum.vs v12, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v12
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
|
|
|
|
define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: slli a1, a0, 2
|
|
; CHECK-NEXT: slli a0, a0, 4
|
|
; CHECK-NEXT: sub a0, a0, a1
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v12, fa0
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
|
|
; CHECK-NEXT: vfredosum.vs v12, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v12
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
; Test Widen vector reduce type (fadd/fmin/fmax)
|
|
define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv3f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: slli a1, a0, 1
|
|
; CHECK-NEXT: add a0, a1, a0
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: lui a1, 1048568
|
|
; CHECK-NEXT: vmv.s.x v10, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfredusum.vs v10, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v10
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16(half %s, <vscale x 3 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
|
|
; CHECK-LABEL: vreduce_fadd_nxv6f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a1, a0, 3
|
|
; CHECK-NEXT: slli a1, a1, 1
|
|
; CHECK-NEXT: sub a0, a0, a1
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: lui a1, 1048568
|
|
; CHECK-NEXT: vmv.s.x v11, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; CHECK-NEXT: vfredusum.vs v11, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa0, v11
|
|
; CHECK-NEXT: ret
|
|
%red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmin.nxv10f16(<vscale x 10 x half>)
|
|
|
|
define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmin_nxv10f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: lui a1, %hi(.LCPI73_0)
|
|
; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0)
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vle16.v v12, (a1)
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: li a1, 10
|
|
; CHECK-NEXT: mul a0, a0, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v12, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v12
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmin.nxv10f16(<vscale x 10 x half> %v)
|
|
ret half %red
|
|
}
|
|
|
|
declare half @llvm.vector.reduce.fmax.nxv12f16(<vscale x 12 x half>)
|
|
|
|
define half @vreduce_fmax_nxv12f16(<vscale x 12 x half> %v) {
|
|
; CHECK-LABEL: vreduce_fmax_nxv12f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: slli a1, a0, 2
|
|
; CHECK-NEXT: slli a0, a0, 4
|
|
; CHECK-NEXT: sub a0, a0, a1
|
|
; CHECK-NEXT: li a1, -512
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v12, a1
|
|
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v12, v8, v12
|
|
; CHECK-NEXT: vfmv.f.s fa0, v12
|
|
; CHECK-NEXT: ret
|
|
%red = call half @llvm.vector.reduce.fmax.nxv12f16(<vscale x 12 x half> %v)
|
|
ret half %red
|
|
}
|