[Intrinsic] Introduce reduction intrinsics for minimum/maximum
This patch introduces the reduction intrinsic for floating point minimum and maximum which has the same semantics (for NaN and signed zero) as llvm.minimum and llvm.maximum. Reviewed-By: nikic Differential Revision: https://reviews.llvm.org/D152370
This commit is contained in:
@@ -17846,6 +17846,64 @@ Arguments:
|
||||
""""""""""
|
||||
The argument to this intrinsic must be a vector of floating-point values.
|
||||
|
||||
.. _int_vector_reduce_fmaximum:
|
||||
|
||||
'``llvm.vector.reduce.fmaximum.*``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
|
||||
declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vector.reduce.fmaximum.*``' intrinsics do a floating-point
|
||||
``MAX`` reduction of a vector, returning the result as a scalar. The return type
|
||||
matches the element-type of the vector input.
|
||||
|
||||
This instruction has the same comparison semantics as the '``llvm.maximum.*``'
|
||||
intrinsic. That is, this intrinsic propagates NaNs and +0.0 is considered
|
||||
greater than -0.0. If any element of the vector is a NaN, the result is NaN.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
The argument to this intrinsic must be a vector of floating-point values.
|
||||
|
||||
.. _int_vector_reduce_fminimum:
|
||||
|
||||
'``llvm.vector.reduce.fminimum.*``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
|
||||
declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vector.reduce.fminimum.*``' intrinsics do a floating-point
|
||||
``MIN`` reduction of a vector, returning the result as a scalar. The return type
|
||||
matches the element-type of the vector input.
|
||||
|
||||
This instruction has the same comparison semantics as the '``llvm.minimum.*``'
|
||||
intrinsic. That is, this intrinsic propagates NaNs and -0.0 is considered less
|
||||
than +0.0. If any element of the vector is a NaN, the result is NaN.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
The argument to this intrinsic must be a vector of floating-point values.
|
||||
|
||||
'``llvm.vector.insert``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
||||
@@ -1318,6 +1318,10 @@ enum NodeType {
|
||||
/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
|
||||
VECREDUCE_FMAX,
|
||||
VECREDUCE_FMIN,
|
||||
/// FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the
|
||||
/// llvm.minimum and llvm.maximum semantics.
|
||||
VECREDUCE_FMAXIMUM,
|
||||
VECREDUCE_FMINIMUM,
|
||||
/// Integer reductions may have a result type larger than the vector element
|
||||
/// type. However, the reduction is performed using the vector element type
|
||||
/// and the value in the top bits is unspecified.
|
||||
|
||||
@@ -756,6 +756,16 @@ public:
|
||||
/// vector.
|
||||
CallInst *CreateFPMinReduce(Value *Src);
|
||||
|
||||
/// Create a vector float maximum reduction intrinsic of the source
|
||||
/// vector. This variant follows the NaN and signed zero semantic of
|
||||
/// llvm.maximum intrinsic.
|
||||
CallInst *CreateFPMaximumReduce(Value *Src);
|
||||
|
||||
/// Create a vector float minimum reduction intrinsic of the source
|
||||
/// vector. This variant follows the NaN and signed zero semantic of
|
||||
/// llvm.minimum intrinsic.
|
||||
CallInst *CreateFPMinimumReduce(Value *Src);
|
||||
|
||||
/// Create a lifetime.start intrinsic.
|
||||
///
|
||||
/// If the pointer isn't i8* it will be converted.
|
||||
|
||||
@@ -2323,6 +2323,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
|
||||
[llvm_anyvector_ty]>;
|
||||
def int_vector_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
|
||||
[llvm_anyvector_ty]>;
|
||||
def int_vector_reduce_fminimum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
|
||||
[llvm_anyvector_ty]>;
|
||||
def int_vector_reduce_fmaximum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
|
||||
[llvm_anyvector_ty]>;
|
||||
}
|
||||
|
||||
//===----- Matrix intrinsics ---------------------------------------------===//
|
||||
|
||||
@@ -2015,7 +2015,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::VECREDUCE_UMAX:
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
|
||||
#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
|
||||
#include "llvm/IR/VPIntrinsics.def"
|
||||
return visitVPOp(N);
|
||||
|
||||
@@ -1205,6 +1205,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
case ISD::IS_FPCLASS:
|
||||
Action = TLI.getOperationAction(
|
||||
Node->getOpcode(), Node->getOperand(0).getValueType());
|
||||
@@ -4002,6 +4004,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Results.push_back(TLI.expandVecReduce(Node, DAG));
|
||||
break;
|
||||
case ISD::GLOBAL_OFFSET_TABLE:
|
||||
|
||||
@@ -145,6 +145,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VECREDUCE_FMUL:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
R = SoftenFloatRes_VECREDUCE(N);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
@@ -2339,6 +2341,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VECREDUCE_FMUL:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
R = PromoteFloatRes_VECREDUCE(N);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
@@ -2704,6 +2708,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VECREDUCE_FMUL:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
R = SoftPromoteHalfRes_VECREDUCE(N);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
|
||||
@@ -451,6 +451,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::VECREDUCE_FMUL:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
Node->getOperand(0).getValueType());
|
||||
break;
|
||||
@@ -960,6 +962,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
|
||||
case ISD::VECREDUCE_FMUL:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Results.push_back(TLI.expandVecReduce(Node, DAG));
|
||||
return;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
|
||||
@@ -696,6 +696,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Res = ScalarizeVecOp_VECREDUCE(N);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
@@ -2924,6 +2926,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Res = SplitVecOp_VECREDUCE(N, OpNo);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
@@ -5921,6 +5925,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
Res = WidenVecOp_VECREDUCE(N);
|
||||
break;
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
|
||||
@@ -455,6 +455,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
case ISD::VP_REDUCE_FMIN:
|
||||
return ISD::FMINNUM;
|
||||
case ISD::VECREDUCE_FMAXIMUM:
|
||||
return ISD::FMAXIMUM;
|
||||
case ISD::VECREDUCE_FMINIMUM:
|
||||
return ISD::FMINIMUM;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12393,6 +12397,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
|
||||
|
||||
return getConstantFP(NeutralAF, DL, VT);
|
||||
}
|
||||
case ISD::FMINIMUM:
|
||||
case ISD::FMAXIMUM: {
|
||||
// Neutral element for fminimum is Inf or FLT_MAX, depending on FMF.
|
||||
const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
|
||||
APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics)
|
||||
: APFloat::getLargest(Semantics);
|
||||
if (Opcode == ISD::FMAXIMUM)
|
||||
NeutralAF.changeSign();
|
||||
|
||||
return getConstantFP(NeutralAF, DL, VT);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "llvm/CodeGen/CodeGenCommonISel.h"
|
||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||
#include "llvm/CodeGen/GCMetadata.h"
|
||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
@@ -7289,6 +7290,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
case Intrinsic::vector_reduce_umin:
|
||||
case Intrinsic::vector_reduce_fmax:
|
||||
case Intrinsic::vector_reduce_fmin:
|
||||
case Intrinsic::vector_reduce_fmaximum:
|
||||
case Intrinsic::vector_reduce_fminimum:
|
||||
visitVectorReduce(I, Intrinsic);
|
||||
return;
|
||||
|
||||
@@ -10010,6 +10013,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
|
||||
case Intrinsic::vector_reduce_fmin:
|
||||
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
|
||||
break;
|
||||
case Intrinsic::vector_reduce_fmaximum:
|
||||
Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags);
|
||||
break;
|
||||
case Intrinsic::vector_reduce_fminimum:
|
||||
Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unhandled vector reduce intrinsic");
|
||||
}
|
||||
|
||||
@@ -500,6 +500,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
|
||||
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
|
||||
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
|
||||
case ISD::VECREDUCE_FMAXIMUM: return "vecreduce_fmaximum";
|
||||
case ISD::VECREDUCE_FMINIMUM: return "vecreduce_fminimum";
|
||||
case ISD::STACKMAP:
|
||||
return "stackmap";
|
||||
case ISD::PATCHPOINT:
|
||||
|
||||
@@ -882,7 +882,8 @@ void TargetLoweringBase::initActions() {
|
||||
ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
|
||||
ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
|
||||
ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
|
||||
ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
|
||||
ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM,
|
||||
ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
|
||||
VT, Expand);
|
||||
|
||||
// Named vector shuffles default to expand.
|
||||
|
||||
@@ -482,6 +482,14 @@ CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
|
||||
return getReductionIntrinsic(Intrinsic::vector_reduce_fmin, Src);
|
||||
}
|
||||
|
||||
CallInst *IRBuilderBase::CreateFPMaximumReduce(Value *Src) {
|
||||
return getReductionIntrinsic(Intrinsic::vector_reduce_fmaximum, Src);
|
||||
}
|
||||
|
||||
CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
|
||||
return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src);
|
||||
}
|
||||
|
||||
CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
|
||||
assert(isa<PointerType>(Ptr->getType()) &&
|
||||
"lifetime.start only applies to pointers.");
|
||||
|
||||
224
llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
Normal file
224
llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
Normal file
@@ -0,0 +1,224 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
|
||||
|
||||
declare half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a)
|
||||
declare float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
|
||||
declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
|
||||
declare fp128 @llvm.vector.reduce.fmaximum.v1f128(<1 x fp128> %a)
|
||||
|
||||
declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
|
||||
declare half @llvm.vector.reduce.fmaximum.v11f16(<11 x half> %a)
|
||||
declare float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
|
||||
declare fp128 @llvm.vector.reduce.fmaximum.v2f128(<2 x fp128> %a)
|
||||
declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a)
|
||||
|
||||
define half @test_v1f16(<1 x half> %a) nounwind {
|
||||
; CHECK-LABEL: test_v1f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%b = call half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a)
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define float @test_v1f32(<1 x float> %a) nounwind {
|
||||
; CHECK-LABEL: test_v1f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
||||
; CHECK-NEXT: ret
|
||||
%b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define double @test_v1f64(<1 x double> %a) nounwind {
|
||||
; CHECK-LABEL: test_v1f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%b = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
|
||||
ret double %b
|
||||
}
|
||||
|
||||
define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
|
||||
; CHECK-LABEL: test_v1f128:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%b = call fp128 @llvm.vector.reduce.fmaximum.v1f128(<1 x fp128> %a)
|
||||
ret fp128 %b
|
||||
}
|
||||
|
||||
define half @test_v4f16(<4 x half> %a) nounwind {
|
||||
; CHECK-NOFP-LABEL: test_v4f16:
|
||||
; CHECK-NOFP: // %bb.0:
|
||||
; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NOFP-NEXT: mov h1, v0.h[1]
|
||||
; CHECK-NOFP-NEXT: fcvt s2, h0
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: fmax s1, s2, s1
|
||||
; CHECK-NOFP-NEXT: mov h2, v0.h[2]
|
||||
; CHECK-NOFP-NEXT: mov h0, v0.h[3]
|
||||
; CHECK-NOFP-NEXT: fcvt h1, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s2, h2
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: fmax s1, s1, s2
|
||||
; CHECK-NOFP-NEXT: fcvt h1, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: fmax s0, s1, s0
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: ret
|
||||
;
|
||||
; CHECK-FP-LABEL: test_v4f16:
|
||||
; CHECK-FP: // %bb.0:
|
||||
; CHECK-FP-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-FP-NEXT: mov h1, v0.h[1]
|
||||
; CHECK-FP-NEXT: mov h2, v0.h[2]
|
||||
; CHECK-FP-NEXT: fmax h1, h0, h1
|
||||
; CHECK-FP-NEXT: mov h0, v0.h[3]
|
||||
; CHECK-FP-NEXT: fmax h1, h1, h2
|
||||
; CHECK-FP-NEXT: fmax h0, h1, h0
|
||||
; CHECK-FP-NEXT: ret
|
||||
%b = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
|
||||
ret half %b
|
||||
}
|
||||
|
||||
define half @test_v11f16(<11 x half> %a) nounwind {
|
||||
; CHECK-NOFP-LABEL: test_v11f16:
|
||||
; CHECK-NOFP: // %bb.0:
|
||||
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: ldr h17, [sp]
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fcvt s2, h2
|
||||
; CHECK-NOFP-NEXT: fcvt s16, h16
|
||||
; CHECK-NOFP-NEXT: fcvt s17, h17
|
||||
; CHECK-NOFP-NEXT: fmax s1, s1, s16
|
||||
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s17
|
||||
; CHECK-NOFP-NEXT: fcvt s16, h16
|
||||
; CHECK-NOFP-NEXT: fcvt h1, s1
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fmax s1, s2, s16
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt h1, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h3
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h4
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h5
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h6
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt s1, h7
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NOFP-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP-NEXT: ret
|
||||
;
|
||||
; CHECK-FP-LABEL: test_v11f16:
|
||||
; CHECK-FP: // %bb.0:
|
||||
; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0
|
||||
; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1
|
||||
; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2
|
||||
; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3
|
||||
; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4
|
||||
; CHECK-FP-NEXT: mov x8, sp
|
||||
; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5
|
||||
; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6
|
||||
; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
|
||||
; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
|
||||
; CHECK-FP-NEXT: movi v1.8h, #252, lsl #8
|
||||
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
|
||||
; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
|
||||
; CHECK-FP-NEXT: add x8, sp, #8
|
||||
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
|
||||
; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
|
||||
; CHECK-FP-NEXT: add x8, sp, #16
|
||||
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
|
||||
; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
|
||||
; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
|
||||
; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
|
||||
; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
|
||||
; CHECK-FP-NEXT: fmax v0.8h, v0.8h, v1.8h
|
||||
; CHECK-FP-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-FP-NEXT: fmax v0.4h, v0.4h, v1.4h
|
||||
; CHECK-FP-NEXT: mov h1, v0.h[1]
|
||||
; CHECK-FP-NEXT: mov h2, v0.h[2]
|
||||
; CHECK-FP-NEXT: fmax h1, h0, h1
|
||||
; CHECK-FP-NEXT: mov h0, v0.h[3]
|
||||
; CHECK-FP-NEXT: fmax h1, h1, h2
|
||||
; CHECK-FP-NEXT: fmax h0, h1, h0
|
||||
; CHECK-FP-NEXT: ret
|
||||
%b = call half @llvm.vector.reduce.fmaximum.v11f16(<11 x half> %a)
|
||||
ret half %b
|
||||
}
|
||||
|
||||
; Neutral element is negative infinity which is chosen for padding the widened
|
||||
; vector.
|
||||
define float @test_v3f32(<3 x float> %a) nounwind {
|
||||
; CHECK-LABEL: test_v3f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
|
||||
; CHECK-NEXT: fmov s1, w8
|
||||
; CHECK-NEXT: mov v0.s[3], v1.s[0]
|
||||
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: mov s1, v0.s[1]
|
||||
; CHECK-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NEXT: ret
|
||||
%b = call float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
|
||||
ret float %b
|
||||
}
|
||||
|
||||
; Neutral element chosen for padding the widened vector is not negative infinity.
|
||||
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
|
||||
; CHECK-LABEL: test_v3f32_ninf:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
|
||||
; CHECK-NEXT: fmov s1, w8
|
||||
; CHECK-NEXT: mov v0.s[3], v1.s[0]
|
||||
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: mov s1, v0.s[1]
|
||||
; CHECK-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NEXT: ret
|
||||
%b = call ninf float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
|
||||
ret float %b
|
||||
}
|
||||
|
||||
; Cannot legalize f128. See PR63267 - The underlying fmaximum has no default
|
||||
; expansion and no libcalls.
|
||||
;define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
|
||||
; %b = call fp128 @llvm.vector.reduce.fmaximum.v2f128(<2 x fp128> %a)
|
||||
; ret fp128 %b
|
||||
;}
|
||||
|
||||
define float @test_v16f32(<16 x float> %a) nounwind {
|
||||
; CHECK-LABEL: test_v16f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmax v1.4s, v1.4s, v3.4s
|
||||
; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: mov s1, v0.s[1]
|
||||
; CHECK-NEXT: fmax s0, s0, s1
|
||||
; CHECK-NEXT: ret
|
||||
%b = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a)
|
||||
ret float %b
|
||||
}
|
||||
1794
llvm/test/CodeGen/X86/vector-reduce-fmaximum.ll
Normal file
1794
llvm/test/CodeGen/X86/vector-reduce-fmaximum.ll
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user