[LV] Add support for minimum/maximum intrinsics
{mini|maxi}mum intrinsics are different from {min|max}num intrinsics in
the propagation of NaN and signed zero. Also, the minnum/maxnum
intrinsics require the presence of nsz flags to be valid reductions in
vectorizer. In this regard, we introduce a new recurrence kind and also
add support for identifying reduction patterns using these intrinsics.
The reduction intrinsics and lowering was introduced here: 26bfbec5d2.
There are tests added which show how this interacts across chains of
min/max patterns.
Differential Revision: https://reviews.llvm.org/D151482
This commit is contained in:
@@ -47,6 +47,8 @@ enum class RecurKind {
|
||||
FMul, ///< Product of floats.
|
||||
FMin, ///< FP min implemented in terms of select(cmp()).
|
||||
FMax, ///< FP max implemented in terms of select(cmp()).
|
||||
FMinimum, ///< FP min with llvm.minimum semantics
|
||||
FMaximum, ///< FP max with llvm.maximum semantics
|
||||
FMulAdd, ///< Fused multiply-add of floats (a * b + c).
|
||||
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
|
||||
///< invariant
|
||||
@@ -223,7 +225,8 @@ public:
|
||||
|
||||
/// Returns true if the recurrence kind is a floating-point min/max kind.
|
||||
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
|
||||
return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
|
||||
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
|
||||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
|
||||
}
|
||||
|
||||
/// Returns true if the recurrence kind is any min/max kind.
|
||||
|
||||
@@ -706,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
|
||||
return InstDesc(Kind == RecurKind::FMin, I);
|
||||
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
|
||||
return InstDesc(Kind == RecurKind::FMax, I);
|
||||
if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
|
||||
return InstDesc(Kind == RecurKind::FMinimum, I);
|
||||
if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
|
||||
return InstDesc(Kind == RecurKind::FMaximum, I);
|
||||
|
||||
return InstDesc(false, I);
|
||||
}
|
||||
@@ -801,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
|
||||
case Instruction::Call:
|
||||
if (isSelectCmpRecurrenceKind(Kind))
|
||||
return isSelectCmpPattern(L, OrigPhi, I, Prev);
|
||||
auto HasRequiredFMF = [&]() {
|
||||
if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
|
||||
return true;
|
||||
if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
|
||||
return true;
|
||||
// minimum and maximum intrinsics do not require nsz and nnan flags since
|
||||
// NaN and signed zeroes are propagated in the intrinsic implementation.
|
||||
return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
|
||||
match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
|
||||
};
|
||||
if (isIntMinMaxRecurrenceKind(Kind) ||
|
||||
(((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
|
||||
(isa<FPMathOperator>(I) && I->hasNoNaNs() &&
|
||||
I->hasNoSignedZeros())) &&
|
||||
isFPMinMaxRecurrenceKind(Kind)))
|
||||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
|
||||
return isMinMaxPattern(I, Kind, Prev);
|
||||
else if (isFMulAddIntrinsic(I))
|
||||
return InstDesc(Kind == RecurKind::FMulAdd, I,
|
||||
@@ -923,6 +934,16 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
|
||||
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
|
||||
SE)) {
|
||||
LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
|
||||
SE)) {
|
||||
LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
// Not a reduction of known type.
|
||||
return false;
|
||||
}
|
||||
@@ -1063,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
|
||||
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
|
||||
"nnan, nsz is expected to be set for FP max reduction.");
|
||||
return ConstantFP::getInfinity(Tp, true /*Negative*/);
|
||||
case RecurKind::FMinimum:
|
||||
return ConstantFP::getInfinity(Tp, false /*Negative*/);
|
||||
case RecurKind::FMaximum:
|
||||
return ConstantFP::getInfinity(Tp, true /*Negative*/);
|
||||
case RecurKind::SelectICmp:
|
||||
case RecurKind::SelectFCmp:
|
||||
return getRecurrenceStartValue();
|
||||
@@ -1097,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
|
||||
return Instruction::ICmp;
|
||||
case RecurKind::FMax:
|
||||
case RecurKind::FMin:
|
||||
case RecurKind::FMaximum:
|
||||
case RecurKind::FMinimum:
|
||||
case RecurKind::SelectFCmp:
|
||||
return Instruction::FCmp;
|
||||
default:
|
||||
|
||||
@@ -909,6 +909,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
|
||||
return Intrinsic::minnum;
|
||||
case RecurKind::FMax:
|
||||
return Intrinsic::maxnum;
|
||||
case RecurKind::FMinimum:
|
||||
return Intrinsic::minimum;
|
||||
case RecurKind::FMaximum:
|
||||
return Intrinsic::maximum;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -928,6 +932,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
|
||||
return CmpInst::FCMP_OLT;
|
||||
case RecurKind::FMax:
|
||||
return CmpInst::FCMP_OGT;
|
||||
// We do not add FMinimum/FMaximum recurrence kind here since there is no
|
||||
// equivalent predicate which compares signed zeroes according to the
|
||||
// semantics of the intrinsics (llvm.minimum/maximum).
|
||||
}
|
||||
}
|
||||
|
||||
@@ -943,7 +950,8 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
|
||||
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
|
||||
Value *Right) {
|
||||
Type *Ty = Left->getType();
|
||||
if (Ty->isIntOrIntVectorTy()) {
|
||||
if (Ty->isIntOrIntVectorTy() ||
|
||||
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
|
||||
// TODO: Add float minnum/maxnum support when FMF nnan is set.
|
||||
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
|
||||
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
|
||||
@@ -1094,6 +1102,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
|
||||
return Builder.CreateFPMaxReduce(Src);
|
||||
case RecurKind::FMin:
|
||||
return Builder.CreateFPMinReduce(Src);
|
||||
case RecurKind::FMinimum:
|
||||
return Builder.CreateFPMinimumReduce(Src);
|
||||
case RecurKind::FMaximum:
|
||||
return Builder.CreateFPMaximumReduce(Src);
|
||||
default:
|
||||
llvm_unreachable("Unhandled opcode");
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
|
||||
; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
@@ -1090,6 +1090,120 @@ for.body: ; preds = %entry, %for.body
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmaximum_intrinsic
|
||||
; CHECK-LABEL: vector.body:
|
||||
; CHECK: call <2 x float> @llvm.maximum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.maximum.v2f32
|
||||
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: call <2 x float> @llvm.maximum.v2f32
|
||||
; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32
|
||||
define float @fmaximum_intrinsic(ptr nocapture readonly %x) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret float %1
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
|
||||
%0 = load float, ptr %arrayidx, align 4
|
||||
%1 = tail call float @llvm.maximum.f32(float %s.011, float %0)
|
||||
%inc = add nuw nsw i32 %i.012, 1
|
||||
%exitcond.not = icmp eq i32 %inc, 1024
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fminimum_intrinsic
|
||||
; CHECK-LABEL: vector.body:
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
|
||||
define float @fminimum_intrinsic(ptr nocapture readonly %x) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret float %1
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
|
||||
%0 = load float, ptr %arrayidx, align 4
|
||||
%1 = tail call float @llvm.minimum.f32(float %s.011, float %0)
|
||||
%inc = add nuw nsw i32 %i.012, 1
|
||||
%exitcond.not = icmp eq i32 %inc, 1024
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fminimum_fminimum
|
||||
; CHECK-LABEL: vector.body:
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
|
||||
define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret float %cond9
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
|
||||
%0 = load float, ptr %arrayidx, align 4
|
||||
%s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0)
|
||||
%arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
|
||||
%1 = load float, ptr %arrayidx3, align 4
|
||||
%cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
|
||||
%inc = add nuw nsw i32 %i.025, 1
|
||||
%exitcond.not = icmp eq i32 %inc, 1024
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fminimum_fminimum_one_with_flags
|
||||
; CHECK-LABEL: vector.body:
|
||||
; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
|
||||
; CHECK-LABEL: middle.block:
|
||||
; CHECK: call <2 x float> @llvm.minimum.v2f32
|
||||
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
|
||||
define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret float %cond9
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
|
||||
%0 = load float, ptr %arrayidx, align 4
|
||||
%s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0)
|
||||
%arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
|
||||
%1 = load float, ptr %arrayidx3, align 4
|
||||
%cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
|
||||
%inc = add nuw nsw i32 %i.025, 1
|
||||
%exitcond.not = icmp eq i32 %inc, 1024
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; Make sure any check-not directives are not triggered by function declarations.
|
||||
; CHECK: declare
|
||||
|
||||
@@ -1099,6 +1213,8 @@ declare i32 @llvm.umin.i32(i32, i32)
|
||||
declare i32 @llvm.umax.i32(i32, i32)
|
||||
declare float @llvm.minnum.f32(float, float)
|
||||
declare float @llvm.maxnum.f32(float, float)
|
||||
declare float @llvm.minimum.f32(float, float)
|
||||
declare float @llvm.maximum.f32(float, float)
|
||||
|
||||
attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
|
||||
attributes #1 = { "no-nans-fp-math"="true" }
|
||||
|
||||
Reference in New Issue
Block a user