[flang] Add PPC vec_max, vec_min, vec_madd and vec_nmsub intrinsic

Differential Revision: https://reviews.llvm.org/D152938
2023-06-14 09:50:56 -04:00
parent 5b8259158a
commit f295c88937
4 changed files with 696 additions and 2 deletions
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -481,8 +481,19 @@ enum class ParamTypeId {
  Integer,
  Real,
  Complex,
+  IntegerVector,
+  UnsignedVector,
+  RealVector,
 };

+// Helper function to get length of a 16-byte vector of element type eleTy.
+static int getVecLen(mlir::Type eleTy) {
+  assert((mlir::isa<mlir::IntegerType>(eleTy) ||
+          mlir::isa<mlir::FloatType>(eleTy)) &&
+         "unsupported vector element type");
+  return 16 / (eleTy.getIntOrFloatBitWidth() / 8);
+}
+
 template <ParamTypeId t, int k>
 struct ParamType {
  // Supported kinds can be checked with static asserts at compile time.
@@ -509,6 +520,12 @@ template <int k>
 using Integer = ParamType<ParamTypeId::Integer, k>;
 template <int k>
 using Complex = ParamType<ParamTypeId::Complex, k>;
+template <int k>
+using IntegerVector = ParamType<ParamTypeId::IntegerVector, k>;
+template <int k>
+using RealVector = ParamType<ParamTypeId::RealVector, k>;
+template <int k>
+using UnsignedVector = ParamType<ParamTypeId::UnsignedVector, k>;
 } // namespace Ty

 // Helper function that generates most types that are supported for intrinsic
@@ -518,24 +535,46 @@ static inline mlir::Type getTypeHelper(mlir::MLIRContext *context,
                                       fir::FirOpBuilder &builder,
                                       ParamTypeId typeId, int kind) {
  mlir::Type r;
-  int bits = 0;
+  unsigned bits{0};
  switch (typeId) {
  case ParamTypeId::Void:
    llvm::report_fatal_error("can not get type of void");
    break;
  case ParamTypeId::Integer:
+  case ParamTypeId::IntegerVector:
    bits = builder.getKindMap().getIntegerBitsize(kind);
    assert(bits != 0 && "failed to convert kind to integer bitsize");
    r = mlir::IntegerType::get(context, bits);
    break;
+  case ParamTypeId::UnsignedVector:
+    bits = builder.getKindMap().getIntegerBitsize(kind);
+    assert(bits != 0 && "failed to convert kind to unsigned bitsize");
+    r = mlir::IntegerType::get(context, bits, mlir::IntegerType::Unsigned);
+    break;
  case ParamTypeId::Real:
+  case ParamTypeId::RealVector:
    r = builder.getRealType(kind);
    break;
  case ParamTypeId::Complex:
    r = fir::ComplexType::get(context, kind);
    break;
  }
-  return r;
+
+  switch (typeId) {
+  case ParamTypeId::Void:
+  case ParamTypeId::Integer:
+  case ParamTypeId::Real:
+  case ParamTypeId::Complex:
+    // keep original type for void and non-vector
+    return r;
+    break;
+  case ParamTypeId::IntegerVector:
+  case ParamTypeId::UnsignedVector:
+  case ParamTypeId::RealVector:
+    // convert to FIR vector type
+    return fir::VectorType::get(getVecLen(r), r);
+    break;
+  }
 }

 // Generic function type generator that supports most of the function types
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -116,6 +116,98 @@ static constexpr MathOperation ppcMathOperations[] = {
     genLibCall},
    {"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
     genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
+    {"__ppc_vec_madd", "llvm.fma.v4f32",
+     genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
+                 Ty::RealVector<4>>,
+     genLibCall},
+    {"__ppc_vec_madd", "llvm.fma.v2f64",
+     genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
+                 Ty::RealVector<8>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
+     genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
+                 Ty::IntegerVector<1>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
+     genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
+                 Ty::IntegerVector<2>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
+     genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
+                 Ty::IntegerVector<4>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
+     genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
+                 Ty::IntegerVector<8>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
+     genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
+                 Ty::UnsignedVector<1>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
+     genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
+                 Ty::UnsignedVector<2>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
+     genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
+                 Ty::UnsignedVector<4>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
+     genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
+                 Ty::UnsignedVector<8>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
+     genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
+     genLibCall},
+    {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
+     genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
+     genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
+                 Ty::IntegerVector<1>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
+     genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
+                 Ty::IntegerVector<2>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
+     genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
+                 Ty::IntegerVector<4>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
+     genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
+                 Ty::IntegerVector<8>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminub",
+     genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
+                 Ty::UnsignedVector<1>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
+     genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
+                 Ty::UnsignedVector<2>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
+     genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
+                 Ty::UnsignedVector<4>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.altivec.vminud",
+     genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
+                 Ty::UnsignedVector<8>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
+     genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
+     genLibCall},
+    {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
+     genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
+     genLibCall},
+    {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
+     genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
+                 Ty::RealVector<4>>,
+     genLibCall},
+    {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
+     genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
+                 Ty::RealVector<8>>,
+     genLibCall},
 };

 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@@ -49,6 +49,17 @@ module __ppc_intrinsics
 #undef ELEM_FUNC_VUVUVU
 #undef ELEM_FUNC_VIVIVI

+!! ================ 3 arguments function interface ================
+! vector(r) function f(vector(r), vector(r), vector(r))
+#define ELEM_FUNC_VRVRVRVR(VKIND) \
+  elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND(arg1, arg2, arg3); \
+    vector(real(VKIND)), intent(in) :: arg1, arg2, arg3; \
+  end function ;
+
+  ELEM_FUNC_VRVRVRVR(4) ELEM_FUNC_VRVRVRVR(8)
+
+#undef ELEM_FUNC_VRVRVRVR
+
  end interface

  procedure(func_r4r4r4r4) :: __ppc_fmadd_r4
@@ -242,6 +253,28 @@ module __ppc_intrinsics
  end interface vec_and
  public :: vec_and

+! vec_max
+  VEC_VI_VI_VI(vec_max,1) VEC_VI_VI_VI(vec_max,2) VEC_VI_VI_VI(vec_max,4) VEC_VI_VI_VI(vec_max,8)
+  VEC_VU_VU_VU(vec_max,1) VEC_VU_VU_VU(vec_max,2) VEC_VU_VU_VU(vec_max,4) VEC_VU_VU_VU(vec_max,8)
+  VEC_VR_VR_VR(vec_max,4) VEC_VR_VR_VR(vec_max,8)
+  interface vec_max
+    procedure :: VI_VI_VI(vec_max,1), VI_VI_VI(vec_max,2), VI_VI_VI(vec_max,4), VI_VI_VI(vec_max,8)
+    procedure :: VU_VU_VU(vec_max,1), VU_VU_VU(vec_max,2), VU_VU_VU(vec_max,4), VU_VU_VU(vec_max,8)
+    procedure :: VR_VR_VR(vec_max,4), VR_VR_VR(vec_max,8)
+  end interface vec_max
+  public :: vec_max
+
+! vec_min
+  VEC_VI_VI_VI(vec_min,1) VEC_VI_VI_VI(vec_min,2) VEC_VI_VI_VI(vec_min,4) VEC_VI_VI_VI(vec_min,8)
+  VEC_VU_VU_VU(vec_min,1) VEC_VU_VU_VU(vec_min,2) VEC_VU_VU_VU(vec_min,4) VEC_VU_VU_VU(vec_min,8)
+  VEC_VR_VR_VR(vec_min,4) VEC_VR_VR_VR(vec_min,8)
+  interface vec_min
+    procedure :: VI_VI_VI(vec_min,1), VI_VI_VI(vec_min,2), VI_VI_VI(vec_min,4), VI_VI_VI(vec_min,8)
+    procedure :: VU_VU_VU(vec_min,1), VU_VU_VU(vec_min,2), VU_VU_VU(vec_min,4), VU_VU_VU(vec_min,8)
+    procedure :: VR_VR_VR(vec_min,4), VR_VR_VR(vec_min,8)
+  end interface vec_min
+  public :: vec_min
+
 ! vec_mul
  VEC_VI_VI_VI(vec_mul,1) VEC_VI_VI_VI(vec_mul,2) VEC_VI_VI_VI(vec_mul,4) VEC_VI_VI_VI(vec_mul,8)
  VEC_VU_VU_VU(vec_mul,1) VEC_VU_VU_VU(vec_mul,2) VEC_VU_VU_VU(vec_mul,4) VEC_VU_VU_VU(vec_mul,8)
@@ -281,4 +314,30 @@ module __ppc_intrinsics
 #undef VR_VR_VR
 #undef VU_VU_VU
 #undef VI_VI_VI
+
+!-----------------------------------------
+! vector function(vector, vector, vector)
+!-----------------------------------------
+#define VR_VR_VR_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND
+
+#define VEC_VR_VR_VR_VR(NAME, VKIND) \
+  procedure(elem_func_vr##VKIND##vr##VKIND##vr##VKIND##vr##VKIND) :: VR_VR_VR_VR(NAME, VKIND);
+
+! vec_madd
+  VEC_VR_VR_VR_VR(vec_madd,4) VEC_VR_VR_VR_VR(vec_madd,8)
+  interface vec_madd
+    procedure :: VR_VR_VR_VR(vec_madd,4), VR_VR_VR_VR(vec_madd,8)
+  end interface vec_madd
+  public :: vec_madd
+
+! vec_nmsub
+  VEC_VR_VR_VR_VR(vec_nmsub,4) VEC_VR_VR_VR_VR(vec_nmsub,8)
+  interface vec_nmsub
+    procedure :: VR_VR_VR_VR(vec_nmsub,4), VR_VR_VR_VR(vec_nmsub,8)
+  end interface vec_nmsub
+  public :: vec_nmsub
+
+#undef VEC_VR_VR_VR_VR
+#undef VR_VR_VR_VR
+
 end module __ppc_intrinsics
--- a/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec_max-min-madd-nmsub.f90
@@ -0,0 +1,504 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
+! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! REQUIRES: target=powerpc{{.*}}
+
+! vec_max
+
+! CHECK-LABEL: vec_max_testf32
+subroutine vec_max_testf32(x, y)
+  vector(real(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.vsx.xvmaxsp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.vsx.xvmaxsp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %[[x]], <4 x float> %[[y]])
+! CHECK: store <4 x float> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testf32
+
+! CHECK-LABEL: vec_max_testf64
+subroutine vec_max_testf64(x, y)
+  vector(real(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.vsx.xvmaxdp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.vsx.xvmaxdp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %[[x]], <2 x double> %[[y]])
+! CHECK: store <2 x double> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testf64
+
+! CHECK-LABEL: vec_max_testi8
+subroutine vec_max_testi8(x, y)
+  vector(integer(1)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsb(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:i8>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsb(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
+
+! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! CHECK: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi8
+
+! CHECK-LABEL: vec_max_testi16
+subroutine vec_max_testi16(x, y)
+  vector(integer(2)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:i16>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
+
+! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! CHECK: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi16
+
+! CHECK-LABEL: vec_max_testi32
+subroutine vec_max_testi32(x, y)
+  vector(integer(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
+
+! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! CHECK: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi32
+
+! CHECK-LABEL: vec_max_testi64
+subroutine vec_max_testi64(x, y)
+  vector(integer(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxsd(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxsd(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
+
+! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! CHECK: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testi64
+
+! CHECK-LABEL: vec_max_testui8
+subroutine vec_max_testui8(x, y)
+  vector(unsigned(1)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxub(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxub(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
+
+! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <16 x i8> @llvm.ppc.altivec.vmaxub(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! CHECK: store <16 x i8> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui8
+
+! CHECK-LABEL: vec_max_testui16
+subroutine vec_max_testui16(x, y)
+  vector(unsigned(2)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxuh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxuh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
+
+! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <8 x i16> @llvm.ppc.altivec.vmaxuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! CHECK: store <8 x i16> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui16
+
+! CHECK-LABEL: vec_max_testui32
+subroutine vec_max_testui32(x, y)
+  vector(unsigned(4)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxuw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxuw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
+
+! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <4 x i32> @llvm.ppc.altivec.vmaxuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! CHECK: store <4 x i32> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui32
+
+! CHECK-LABEL: vec_max_testui64
+subroutine vec_max_testui64(x, y)
+  vector(unsigned(8)) :: vmax, x, y
+  vmax = vec_max(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[vmax:.*]] = fir.call @llvm.ppc.altivec.vmaxud(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
+! CHECK-FIR: fir.store %[[vmax]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[vmax:.*]] = llvm.call @llvm.ppc.altivec.vmaxud(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
+! CHECK-LLVMIR: llvm.store %[[vmax]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
+
+! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmax:.*]] = call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! CHECK: store <2 x i64> %[[vmax]], ptr %{{[0-9]}}, align 16
+end subroutine vec_max_testui64
+
+! vec_min
+
+! CHECK-LABEL: vec_min_testf32
+subroutine vec_min_testf32(x, y)
+  vector(real(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.vsx.xvminsp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.vsx.xvminsp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %[[x]], <4 x float> %[[y]])
+! CHECK: store <4 x float> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testf32
+
+! CHECK-LABEL: vec_min_testf64
+subroutine vec_min_testf64(x, y)
+  vector(real(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.vsx.xvmindp(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.vsx.xvmindp(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %[[x]], <2 x double> %[[y]])
+! CHECK: store <2 x double> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testf64
+
+! CHECK-LABEL: vec_min_testi8
+subroutine vec_min_testi8(x, y)
+  vector(integer(1)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsb(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:i8>, !fir.vector<16:i8>) -> !fir.vector<16:i8>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:i8>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsb(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
+
+! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminsb(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! CHECK: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi8
+
+! CHECK-LABEL: vec_min_testi16
+subroutine vec_min_testi16(x, y)
+  vector(integer(2)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:i16>, !fir.vector<8:i16>) -> !fir.vector<8:i16>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:i16>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
+
+! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminsh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! CHECK: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi16
+
+! CHECK-LABEL: vec_min_testi32
+subroutine vec_min_testi32(x, y)
+  vector(integer(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:i32>, !fir.vector<4:i32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
+
+! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminsw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! CHECK: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi32
+
+! CHECK-LABEL: vec_min_testi64
+subroutine vec_min_testi64(x, y)
+  vector(integer(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminsd(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:i64>, !fir.vector<2:i64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminsd(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
+
+! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! CHECK: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testi64
+
+! CHECK-LABEL: vec_min_testui8
+subroutine vec_min_testui8(x, y)
+  vector(unsigned(1)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminub(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<16:ui8>, !fir.vector<16:ui8>) -> !fir.vector<16:ui8>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<16:ui8>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminub(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, vector<16xi8>) -> vector<16xi8>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<16xi8>>
+
+! CHECK: %[[x:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <16 x i8>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <16 x i8> @llvm.ppc.altivec.vminub(<16 x i8> %[[x]], <16 x i8> %[[y]])
+! CHECK: store <16 x i8> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui8
+
+! CHECK-LABEL: vec_min_testui16
+subroutine vec_min_testui16(x, y)
+  vector(unsigned(2)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminuh(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<8:ui16>, !fir.vector<8:ui16>) -> !fir.vector<8:ui16>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<8:ui16>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminuh(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, vector<8xi16>) -> vector<8xi16>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<8xi16>>
+
+! CHECK: %[[x:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <8 x i16>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <8 x i16> @llvm.ppc.altivec.vminuh(<8 x i16> %[[x]], <8 x i16> %[[y]])
+! CHECK: store <8 x i16> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui16
+
+! CHECK-LABEL: vec_min_testui32
+subroutine vec_min_testui32(x, y)
+  vector(unsigned(4)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminuw(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<4:ui32>, !fir.vector<4:ui32>) -> !fir.vector<4:ui32>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:ui32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminuw(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, vector<4xi32>) -> vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<4xi32>>
+
+! CHECK: %[[x:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x i32>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <4 x i32> @llvm.ppc.altivec.vminuw(<4 x i32> %[[x]], <4 x i32> %[[y]])
+! CHECK: store <4 x i32> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui32
+
+! CHECK-LABEL: vec_min_testui64
+subroutine vec_min_testui64(x, y)
+  vector(unsigned(8)) :: vmin, x, y
+  vmin = vec_min(x, y)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:ui64>>
+! CHECK-FIR: %[[vmin:.*]] = fir.call @llvm.ppc.altivec.vminud(%[[x]], %[[y]]) fastmath<contract> : (!fir.vector<2:ui64>, !fir.vector<2:ui64>) -> !fir.vector<2:ui64>
+! CHECK-FIR: fir.store %[[vmin]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:ui64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: %[[vmin:.*]] = llvm.call @llvm.ppc.altivec.vminud(%[[x]], %[[y]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xi64>, vector<2xi64>) -> vector<2xi64>
+! CHECK-LLVMIR: llvm.store %[[vmin]], %{{[0-9]}} : !llvm.ptr<vector<2xi64>>
+
+! CHECK: %[[x:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x i64>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmin:.*]] = call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %[[x]], <2 x i64> %[[y]])
+! CHECK: store <2 x i64> %[[vmin]], ptr %{{[0-9]}}, align 16
+end subroutine vec_min_testui64
+
+! vec_madd
+
+! CHECK-LABEL: vec_madd_testf32
+subroutine vec_madd_testf32(x, y, z)
+  vector(real(4)) :: vmsum, x, y, z
+  vmsum = vec_madd(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[vmsum:.*]] = fir.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[vmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[vmsum:.*]] = llvm.call @llvm.fma.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vmsum]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmsum:.*]] = call contract <4 x float> @llvm.fma.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! CHECK: store <4 x float> %[[vmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_madd_testf32
+
+! CHECK-LABEL: vec_madd_testf64
+subroutine vec_madd_testf64(x, y, z)
+  vector(real(8)) :: vmsum, x, y, z
+  vmsum = vec_madd(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[vmsum:.*]] = fir.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[vmsum]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[vmsum:.*]] = llvm.call @llvm.fma.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vmsum]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vmsum:.*]] = call contract <2 x double> @llvm.fma.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! CHECK: store <2 x double> %[[vmsum]], ptr %{{[0-9]}}, align 16
+end subroutine vec_madd_testf64
+
+! vec_nmsub
+
+! CHECK-LABEL: vec_nmsub_testf32
+subroutine vec_nmsub_testf32(x, y, z)
+  vector(real(4)) :: vnmsub, x, y, z
+  vnmsub = vec_nmsub(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[nmsub:.*]] = fir.call @llvm.ppc.fnmsub.v4f32(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<4:f32>, !fir.vector<4:f32>, !fir.vector<4:f32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[nmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[vnmsub:.*]] = llvm.call @llvm.ppc.fnmsub.v4f32(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[vnmsub]], %{{[0-9]}} : !llvm.ptr<vector<4xf32>>
+
+! CHECK: %[[x:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <4 x float>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vnmsub:.*]] = call contract <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %[[x]], <4 x float> %[[y]], <4 x float> %[[z]])
+! CHECK: store <4 x float> %[[vnmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmsub_testf32
+
+! CHECK-LABEL: vec_nmsub_testf64
+subroutine vec_nmsub_testf64(x, y, z)
+  vector(real(8)) :: vnmsub, x, y, z
+  vnmsub = vec_nmsub(x, y, z)
+! CHECK-FIR: %[[x:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[y:.*]] = fir.load %arg1 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[z:.*]] = fir.load %arg2 : !fir.ref<!fir.vector<2:f64>>
+! CHECK-FIR: %[[nmsub:.*]] = fir.call @llvm.ppc.fnmsub.v2f64(%[[x]], %[[y]], %[[z]]) fastmath<contract> : (!fir.vector<2:f64>, !fir.vector<2:f64>, !fir.vector<2:f64>) -> !fir.vector<2:f64>
+! CHECK-FIR: fir.store %[[nmsub]] to %{{[0-9]}} : !fir.ref<!fir.vector<2:f64>>
+
+! CHECK-LLVMIR: %[[x:.*]] = llvm.load %arg0 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[y:.*]] = llvm.load %arg1 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[z:.*]] = llvm.load %arg2 : !llvm.ptr<vector<2xf64>>
+! CHECK-LLVMIR: %[[vnmsub:.*]] = llvm.call @llvm.ppc.fnmsub.v2f64(%[[x]], %[[y]], %[[z]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<2xf64>, vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+! CHECK-LLVMIR: llvm.store %[[vnmsub]], %{{[0-9]}} : !llvm.ptr<vector<2xf64>>
+
+! CHECK: %[[x:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[y:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[z:.*]] = load <2 x double>, ptr %{{[0-9]}}, align 16
+! CHECK: %[[vnmsub:.*]] = call contract <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %[[x]], <2 x double> %[[y]], <2 x double> %[[z]])
+! CHECK: store <2 x double> %[[vnmsub]], ptr %{{[0-9]}}, align 16
+end subroutine vec_nmsub_testf64