[LV] Add support for minimum/maximum intrinsics

{mini|maxi}mum intrinsics are different from {min|max}num intrinsics in the propagation of NaN and signed zero. Also, the minnum/maxnum intrinsics require the presence of nsz flags to be valid reductions in vectorizer. In this regard, we introduce a new recurrence kind and also add support for identifying reduction patterns using these intrinsics. The reduction intrinsics and lowering was introduced here: 26bfbec5d2. There are tests added which show how this interacts across chains of min/max patterns. Differential Revision: https://reviews.llvm.org/D151482
2023-06-13 14:41:23 -04:00
parent 0cb977dda1
commit ec146cb7c0
4 changed files with 165 additions and 7 deletions
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
  FMul,       ///< Product of floats.
  FMin,       ///< FP min implemented in terms of select(cmp()).
  FMax,       ///< FP max implemented in terms of select(cmp()).
+  FMinimum,   ///< FP min with llvm.minimum semantics
+  FMaximum,   ///< FP max with llvm.maximum semantics
  FMulAdd,    ///< Fused multiply-add of floats (a * b + c).
  SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
              ///< invariant
@@ -223,7 +225,8 @@ public:

  /// Returns true if the recurrence kind is a floating-point min/max kind.
  static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
-    return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
+    return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+           Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
  }

  /// Returns true if the recurrence kind is any min/max kind.
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -706,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
    return InstDesc(Kind == RecurKind::FMin, I);
  if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
    return InstDesc(Kind == RecurKind::FMax, I);
+  if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
+    return InstDesc(Kind == RecurKind::FMinimum, I);
+  if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
+    return InstDesc(Kind == RecurKind::FMaximum, I);

  return InstDesc(false, I);
 }
@@ -801,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
  case Instruction::Call:
    if (isSelectCmpRecurrenceKind(Kind))
      return isSelectCmpPattern(L, OrigPhi, I, Prev);
+    auto HasRequiredFMF = [&]() {
+     if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
+       return true;
+     if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
+       return true;
+     // minimum and maximum intrinsics do not require nsz and nnan flags since
+     // NaN and signed zeroes are propagated in the intrinsic implementation.
+     return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
+            match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
+    };
    if (isIntMinMaxRecurrenceKind(Kind) ||
-        (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
-          (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
-           I->hasNoSignedZeros())) &&
-         isFPMinMaxRecurrenceKind(Kind)))
+        (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
      return isMinMaxPattern(I, Kind, Prev);
    else if (isFMulAddIntrinsic(I))
      return InstDesc(Kind == RecurKind::FMulAdd, I,
@@ -923,6 +934,16 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
    LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
    return true;
  }
+  if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
+    LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
+                      SE)) {
+    LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
+    return true;
+  }
  // Not a reduction of known type.
  return false;
 }
@@ -1063,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
    assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
           "nnan, nsz is expected to be set for FP max reduction.");
    return ConstantFP::getInfinity(Tp, true /*Negative*/);
+  case RecurKind::FMinimum:
+    return ConstantFP::getInfinity(Tp, false /*Negative*/);
+  case RecurKind::FMaximum:
+    return ConstantFP::getInfinity(Tp, true /*Negative*/);
  case RecurKind::SelectICmp:
  case RecurKind::SelectFCmp:
    return getRecurrenceStartValue();
@@ -1097,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
    return Instruction::ICmp;
  case RecurKind::FMax:
  case RecurKind::FMin:
+  case RecurKind::FMaximum:
+  case RecurKind::FMinimum:
  case RecurKind::SelectFCmp:
    return Instruction::FCmp;
  default:
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -909,6 +909,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
    return Intrinsic::minnum;
  case RecurKind::FMax:
    return Intrinsic::maxnum;
+  case RecurKind::FMinimum:
+    return Intrinsic::minimum;
+  case RecurKind::FMaximum:
+    return Intrinsic::maximum;
  }
 }

@@ -928,6 +932,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
    return CmpInst::FCMP_OLT;
  case RecurKind::FMax:
    return CmpInst::FCMP_OGT;
+  // We do not add FMinimum/FMaximum recurrence kind here since there is no
+  // equivalent predicate which compares signed zeroes according to the
+  // semantics of the intrinsics (llvm.minimum/maximum).
  }
 }

@@ -943,7 +950,8 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
 Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
                            Value *Right) {
  Type *Ty = Left->getType();
-  if (Ty->isIntOrIntVectorTy()) {
+  if (Ty->isIntOrIntVectorTy() ||
+      (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
    // TODO: Add float minnum/maxnum support when FMF nnan is set.
    Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
    return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
@@ -1094,6 +1102,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
    return Builder.CreateFPMaxReduce(Src);
  case RecurKind::FMin:
    return Builder.CreateFPMinReduce(Src);
+  case RecurKind::FMinimum:
+    return Builder.CreateFPMinimumReduce(Src);
+  case RecurKind::FMaximum:
+    return Builder.CreateFPMaximumReduce(Src);
  default:
    llvm_unreachable("Unhandled opcode");
  }
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
+; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2  < %s | FileCheck %s

 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

@@ -1090,6 +1090,120 @@ for.body:                                         ; preds = %entry, %for.body
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }

+; CHECK-LABEL: fmaximum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.maximum.v2f32
+; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32
+define float @fmaximum_intrinsic(ptr nocapture readonly %x) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %1
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call float @llvm.maximum.f32(float %s.011, float %0)
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_intrinsic
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_intrinsic(ptr nocapture readonly %x) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %1
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = tail call float @llvm.minimum.f32(float %s.011, float %0)
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum
+; CHECK-LABEL: vector.body:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %cond9
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+  %0 = load float, ptr %arrayidx, align 4
+  %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0)
+  %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+  %1 = load float, ptr %arrayidx3, align 4
+  %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+  %inc = add nuw nsw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: fminimum_fminimum_one_with_flags
+; CHECK-LABEL: vector.body:
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+
+; CHECK-LABEL: middle.block:
+; CHECK: call <2 x float> @llvm.minimum.v2f32
+; CHECK: call float @llvm.vector.reduce.fminimum.v2f32
+define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret float %cond9
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025
+  %0 = load float, ptr %arrayidx, align 4
+  %s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0)
+  %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025
+  %1 = load float, ptr %arrayidx3, align 4
+  %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1)
+  %inc = add nuw nsw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
 ; Make sure any check-not directives are not triggered by function declarations.
 ; CHECK: declare

@@ -1099,6 +1213,8 @@ declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
 declare float @llvm.minnum.f32(float, float)
 declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.minimum.f32(float, float)
+declare float @llvm.maximum.f32(float, float)

 attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
 attributes #1 = { "no-nans-fp-math"="true" }