[Intrinsic] Introduce reduction intrinsics for minimum/maximum

This patch introduces the reduction intrinsic for floating point minimum
and maximum which has the same semantics (for NaN and signed zero) as
llvm.minimum and llvm.maximum.

Reviewed-By: nikic

Differential Revision: https://reviews.llvm.org/D152370
This commit is contained in:
Anna Thomas
2023-06-09 17:09:12 -04:00
parent 22f5dc7501
commit 26bfbec5d2
16 changed files with 2154 additions and 2 deletions

View File

@@ -17846,6 +17846,64 @@ Arguments:
""""""""""
The argument to this intrinsic must be a vector of floating-point values.
.. _int_vector_reduce_fmaximum:
'``llvm.vector.reduce.fmaximum.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
Overview:
"""""""""
The '``llvm.vector.reduce.fmaximum.*``' intrinsics do a floating-point
``MAX`` reduction of a vector, returning the result as a scalar. The return type
matches the element-type of the vector input.
This instruction has the same comparison semantics as the '``llvm.maximum.*``'
intrinsic. That is, this intrinsic propagates NaNs and +0.0 is considered
greater than -0.0. If any element of the vector is a NaN, the result is NaN.
Arguments:
""""""""""
The argument to this intrinsic must be a vector of floating-point values.
.. _int_vector_reduce_fminimum:
'``llvm.vector.reduce.fminimum.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
Overview:
"""""""""
The '``llvm.vector.reduce.fminimum.*``' intrinsics do a floating-point
``MIN`` reduction of a vector, returning the result as a scalar. The return type
matches the element-type of the vector input.
This instruction has the same comparison semantics as the '``llvm.minimum.*``'
intrinsic. That is, this intrinsic propagates NaNs and -0.0 is considered less
than +0.0. If any element of the vector is a NaN, the result is NaN.
Arguments:
""""""""""
The argument to this intrinsic must be a vector of floating-point values.
'``llvm.vector.insert``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -1318,6 +1318,10 @@ enum NodeType {
/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
VECREDUCE_FMAX,
VECREDUCE_FMIN,
/// FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the
/// llvm.minimum and llvm.maximum semantics.
VECREDUCE_FMAXIMUM,
VECREDUCE_FMINIMUM,
/// Integer reductions may have a result type larger than the vector element
/// type. However, the reduction is performed using the vector element type
/// and the value in the top bits is unspecified.

View File

@@ -756,6 +756,16 @@ public:
/// vector.
CallInst *CreateFPMinReduce(Value *Src);
/// Create a vector float maximum reduction intrinsic of the source
/// vector. This variant follows the NaN and signed zero semantic of
/// llvm.maximum intrinsic.
CallInst *CreateFPMaximumReduce(Value *Src);
/// Create a vector float minimum reduction intrinsic of the source
/// vector. This variant follows the NaN and signed zero semantic of
/// llvm.minimum intrinsic.
CallInst *CreateFPMinimumReduce(Value *Src);
/// Create a lifetime.start intrinsic.
///
/// If the pointer isn't i8* it will be converted.

View File

@@ -2323,6 +2323,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
[llvm_anyvector_ty]>;
def int_vector_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty]>;
def int_vector_reduce_fminimum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty]>;
def int_vector_reduce_fmaximum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty]>;
}
//===----- Matrix intrinsics ---------------------------------------------===//

View File

@@ -2015,7 +2015,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
#include "llvm/IR/VPIntrinsics.def"
return visitVPOp(N);

View File

@@ -1205,6 +1205,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
case ISD::IS_FPCLASS:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
@@ -4002,6 +4004,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
break;
case ISD::GLOBAL_OFFSET_TABLE:

View File

@@ -145,6 +145,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
R = SoftenFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2339,6 +2341,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
R = PromoteFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2704,6 +2708,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
R = SoftPromoteHalfRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:

View File

@@ -451,6 +451,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -960,6 +962,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
case ISD::VECREDUCE_SEQ_FADD:

View File

@@ -696,6 +696,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Res = ScalarizeVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2924,6 +2926,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Res = SplitVecOp_VECREDUCE(N, OpNo);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -5921,6 +5925,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
Res = WidenVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:

View File

@@ -455,6 +455,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
case ISD::VECREDUCE_FMIN:
case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
case ISD::VECREDUCE_FMAXIMUM:
return ISD::FMAXIMUM;
case ISD::VECREDUCE_FMINIMUM:
return ISD::FMINIMUM;
}
}
@@ -12393,6 +12397,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
return getConstantFP(NeutralAF, DL, VT);
}
case ISD::FMINIMUM:
case ISD::FMAXIMUM: {
// Neutral element for fminimum is Inf or FLT_MAX, depending on FMF.
const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics)
: APFloat::getLargest(Semantics);
if (Opcode == ISD::FMAXIMUM)
NeutralAF.changeSign();
return getConstantFP(NeutralAF, DL, VT);
}
}
}

View File

@@ -33,6 +33,7 @@
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -7289,6 +7290,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fmaximum:
case Intrinsic::vector_reduce_fminimum:
visitVectorReduce(I, Intrinsic);
return;
@@ -10010,6 +10013,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
case Intrinsic::vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
case Intrinsic::vector_reduce_fmaximum:
Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags);
break;
case Intrinsic::vector_reduce_fminimum:
Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}

View File

@@ -500,6 +500,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
case ISD::VECREDUCE_FMAXIMUM: return "vecreduce_fmaximum";
case ISD::VECREDUCE_FMINIMUM: return "vecreduce_fminimum";
case ISD::STACKMAP:
return "stackmap";
case ISD::PATCHPOINT:

View File

@@ -882,7 +882,8 @@ void TargetLoweringBase::initActions() {
ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM,
ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
VT, Expand);
// Named vector shuffles default to expand.

View File

@@ -482,6 +482,14 @@ CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
return getReductionIntrinsic(Intrinsic::vector_reduce_fmin, Src);
}
CallInst *IRBuilderBase::CreateFPMaximumReduce(Value *Src) {
return getReductionIntrinsic(Intrinsic::vector_reduce_fmaximum, Src);
}
CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src);
}
CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
assert(isa<PointerType>(Ptr->getType()) &&
"lifetime.start only applies to pointers.");

View File

@@ -0,0 +1,224 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
declare half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
declare fp128 @llvm.vector.reduce.fmaximum.v1f128(<1 x fp128> %a)
declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
declare half @llvm.vector.reduce.fmaximum.v11f16(<11 x half> %a)
declare float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
declare fp128 @llvm.vector.reduce.fmaximum.v2f128(<2 x fp128> %a)
declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a)
define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a)
ret half %b
}
define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-LABEL: test_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
ret float %b
}
define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
ret double %b
}
define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fmaximum.v1f128(<1 x fp128> %a)
ret fp128 %b
}
define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v4f16:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NOFP-NEXT: mov h1, v0.h[1]
; CHECK-NOFP-NEXT: fcvt s2, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmax s1, s2, s1
; CHECK-NOFP-NEXT: mov h2, v0.h[2]
; CHECK-NOFP-NEXT: mov h0, v0.h[3]
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmax s1, s1, s2
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmax s0, s1, s0
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
;
; CHECK-FP-LABEL: test_v4f16:
; CHECK-FP: // %bb.0:
; CHECK-FP-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP-NEXT: mov h1, v0.h[1]
; CHECK-FP-NEXT: mov h2, v0.h[2]
; CHECK-FP-NEXT: fmax h1, h0, h1
; CHECK-FP-NEXT: mov h0, v0.h[3]
; CHECK-FP-NEXT: fmax h1, h1, h2
; CHECK-FP-NEXT: fmax h0, h1, h0
; CHECK-FP-NEXT: ret
%b = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
ret half %b
}
define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v11f16:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: ldr h17, [sp]
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fmax s1, s1, s16
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fmax s0, s0, s17
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fmax s1, s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt s1, h3
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt s1, h4
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt s1, h5
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt s1, h6
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt s1, h7
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmax s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
;
; CHECK-FP-LABEL: test_v11f16:
; CHECK-FP: // %bb.0:
; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1
; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2
; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3
; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4
; CHECK-FP-NEXT: mov x8, sp
; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5
; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6
; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
; CHECK-FP-NEXT: movi v1.8h, #252, lsl #8
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
; CHECK-FP-NEXT: add x8, sp, #8
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
; CHECK-FP-NEXT: add x8, sp, #16
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
; CHECK-FP-NEXT: fmax v0.8h, v0.8h, v1.8h
; CHECK-FP-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-FP-NEXT: fmax v0.4h, v0.4h, v1.4h
; CHECK-FP-NEXT: mov h1, v0.h[1]
; CHECK-FP-NEXT: mov h2, v0.h[2]
; CHECK-FP-NEXT: fmax h1, h0, h1
; CHECK-FP-NEXT: mov h0, v0.h[3]
; CHECK-FP-NEXT: fmax h1, h1, h2
; CHECK-FP-NEXT: fmax h0, h1, h0
; CHECK-FP-NEXT: ret
%b = call half @llvm.vector.reduce.fmaximum.v11f16(<11 x half> %a)
ret half %b
}
; Neutral element is negative infinity which is chosen for padding the widened
; vector.
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fmax s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
ret float %b
}
; Neutral element chosen for padding the widened vector is not negative infinity.
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_ninf:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fmax s0, s0, s1
; CHECK-NEXT: ret
%b = call ninf float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
ret float %b
}
; Cannot legalize f128. See PR63267 - The underlying fmaximum has no default
; expansion and no libcalls.
;define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; %b = call fp128 @llvm.vector.reduce.fmaximum.v2f128(<2 x fp128> %a)
; ret fp128 %b
;}
define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-LABEL: test_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmax v1.4s, v1.4s, v3.4s
; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s
; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fmax s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a)
ret float %b
}

File diff suppressed because it is too large Load Diff