[LV] Add support for minimum/maximum intrinsics

{mini|maxi}mum intrinsics are different from {min|max}num intrinsics in
the propagation of NaN and signed zero. Also, the minnum/maxnum
intrinsics require the presence of nsz flags to be valid reductions in
vectorizer. In this regard, we introduce a new recurrence kind and also
add support for identifying reduction patterns using these intrinsics.

The reduction intrinsics and lowering was introduced here: 26bfbec5d2.

There are tests added which show how this interacts across chains of
min/max patterns.

Differential Revision: https://reviews.llvm.org/D151482
This commit is contained in:
Anna Thomas
2023-06-13 14:41:23 -04:00
parent 0cb977dda1
commit ec146cb7c0
4 changed files with 165 additions and 7 deletions

View File

@@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMulAdd, ///< Fused multiply-add of floats (a * b + c).
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
///< invariant
@@ -223,7 +225,8 @@ public:
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
}
/// Returns true if the recurrence kind is any min/max kind.

View File

@@ -706,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMinimum, I);
if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMaximum, I);
return InstDesc(false, I);
}
@@ -801,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
case Instruction::Call:
if (isSelectCmpRecurrenceKind(Kind))
return isSelectCmpPattern(L, OrigPhi, I, Prev);
auto HasRequiredFMF = [&]() {
if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
return true;
if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
return true;
// minimum and maximum intrinsics do not require nsz and nnan flags since
// NaN and signed zeroes are propagated in the intrinsic implementation.
return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
(isa<FPMathOperator>(I) && I->hasNoNaNs() &&
I->hasNoSignedZeros())) &&
isFPMinMaxRecurrenceKind(Kind)))
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
@@ -923,6 +934,16 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
SE)) {
LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
SE)) {
LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
return true;
}
// Not a reduction of known type.
return false;
}
@@ -1063,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
"nnan, nsz is expected to be set for FP max reduction.");
return ConstantFP::getInfinity(Tp, true /*Negative*/);
case RecurKind::FMinimum:
return ConstantFP::getInfinity(Tp, false /*Negative*/);
case RecurKind::FMaximum:
return ConstantFP::getInfinity(Tp, true /*Negative*/);
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return getRecurrenceStartValue();
@@ -1097,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMaximum:
case RecurKind::FMinimum:
case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:

View File

@@ -909,6 +909,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
return Intrinsic::minnum;
case RecurKind::FMax:
return Intrinsic::maxnum;
case RecurKind::FMinimum:
return Intrinsic::minimum;
case RecurKind::FMaximum:
return Intrinsic::maximum;
}
}
@@ -928,6 +932,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
return CmpInst::FCMP_OLT;
case RecurKind::FMax:
return CmpInst::FCMP_OGT;
// We do not add FMinimum/FMaximum recurrence kind here since there is no
// equivalent predicate which compares signed zeroes according to the
// semantics of the intrinsics (llvm.minimum/maximum).
}
}
@@ -943,7 +950,8 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
if (Ty->isIntOrIntVectorTy()) {
if (Ty->isIntOrIntVectorTy() ||
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
// TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
@@ -1094,6 +1102,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
return Builder.CreateFPMaxReduce(Src);
case RecurKind::FMin:
return Builder.CreateFPMinReduce(Src);
case RecurKind::FMinimum:
return Builder.CreateFPMinimumReduce(Src);
case RecurKind::FMaximum:
return Builder.CreateFPMaximumReduce(Src);
default:
llvm_unreachable("Unhandled opcode");
}

View File

@@ -1,4 +1,4 @@
; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -1090,6 +1090,120 @@ for.body: ; preds = %entry, %for.body
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK-LABEL: fmaximum_intrinsic
; CHECK-LABEL: vector.body:
; CHECK: call <2 x float> @llvm.maximum.v2f32
; CHECK: call <2 x float> @llvm.maximum.v2f32
; CHECK-LABEL: middle.block:
; CHECK: call <2 x float> @llvm.maximum.v2f32
; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32
define float @fmaximum_intrinsic(ptr nocapture readonly %x) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret float %1
for.body: ; preds = %entry, %for.body
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
%0 = load float, ptr %arrayidx, align 4
%1 = tail call float @llvm.maximum.f32(float %s.011, float %0)
%inc = add nuw nsw i32 %i.012, 1
%exitcond.not = icmp eq i32 %inc, 1024
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK-LABEL: fminimum_intrinsic
; CHECK-LABEL: vector.body:
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK-LABEL: middle.block:
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
define float @fminimum_intrinsic(ptr nocapture readonly %x) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret float %1
for.body: ; preds = %entry, %for.body
%i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
%0 = load float, ptr %arrayidx, align 4
%1 = tail call float @llvm.minimum.f32(float %s.011, float %0)
%inc = add nuw nsw i32 %i.012, 1
%exitcond.not = icmp eq i32 %inc, 1024
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK-LABEL: fminimum_fminimum
; CHECK-LABEL: vector.body:
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK-LABEL: middle.block:
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret float %cond9
for.body: ; preds = %entry, %for.body
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
%0 = load float, ptr %arrayidx, align 4
%s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0)
%arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
%1 = load float, ptr %arrayidx3, align 4
%cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
%inc = add nuw nsw i32 %i.025, 1
%exitcond.not = icmp eq i32 %inc, 1024
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK-LABEL: fminimum_fminimum_one_with_flags
; CHECK-LABEL: vector.body:
; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK-LABEL: middle.block:
; CHECK: call <2 x float> @llvm.minimum.v2f32
; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret float %cond9
for.body: ; preds = %entry, %for.body
%i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
%0 = load float, ptr %arrayidx, align 4
%s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0)
%arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
%1 = load float, ptr %arrayidx3, align 4
%cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
%inc = add nuw nsw i32 %i.025, 1
%exitcond.not = icmp eq i32 %inc, 1024
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; Make sure any check-not directives are not triggered by function declarations.
; CHECK: declare
@@ -1099,6 +1213,8 @@ declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare float @llvm.minnum.f32(float, float)
declare float @llvm.maxnum.f32(float, float)
declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)
attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
attributes #1 = { "no-nans-fp-math"="true" }