[InstCombine] fold fake vector extract to shift+trunc
We already handle more complicated cases like: extelt (bitcast (inselt poison, X, 0)) --> trunc (lshr X) But we missed this simpler pattern: https://alive2.llvm.org/ce/z/D55h64 / https://alive2.llvm.org/ce/z/GKzzRq This is part of solving: https://llvm.org/PR52057 I made the transform depend on legal/desirable int type to avoid creating a shift of an illegal type (for example i128). I'm not sure if that restriction is actually necessary, but we can change that as a follow-up if the backend can deal with integer ops on too-wide illegal types. The pile of AVX512 test changes are all neutral AFAICT - the x86 backend seems to know how to turn that into the expected "kmov" instructions. Differential Revision: https://reviews.llvm.org/D111082
This commit is contained in:
@@ -317,6 +317,7 @@ private:
|
||||
|
||||
Value *EmitGEPOffset(User *GEP);
|
||||
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
|
||||
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
|
||||
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
|
||||
Instruction *narrowBinOp(TruncInst &Trunc);
|
||||
Instruction *narrowMaskedBinOp(BinaryOperator &And);
|
||||
|
||||
@@ -178,24 +178,46 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
|
||||
return &EI;
|
||||
}
|
||||
|
||||
static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
|
||||
InstCombiner::BuilderTy &Builder,
|
||||
bool IsBigEndian) {
|
||||
Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
|
||||
Value *X;
|
||||
uint64_t ExtIndexC;
|
||||
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
|
||||
!X->getType()->isVectorTy() ||
|
||||
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
|
||||
return nullptr;
|
||||
|
||||
ElementCount NumElts =
|
||||
cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
|
||||
Type *DestTy = Ext.getType();
|
||||
bool IsBigEndian = DL.isBigEndian();
|
||||
|
||||
// If we are casting an integer to vector and extracting a portion, that is
|
||||
// a shift-right and truncate.
|
||||
// TODO: If no shift is needed, allow extra use?
|
||||
// TODO: Allow FP dest type by casting the trunc to FP?
|
||||
if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
|
||||
isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
|
||||
Ext.getVectorOperand()->hasOneUse()) {
|
||||
assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
|
||||
"Expected fixed vector type for bitcast from scalar integer");
|
||||
|
||||
// Big endian requires adjusting the extract index since MSB is at index 0.
|
||||
// LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
|
||||
// BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
|
||||
if (IsBigEndian)
|
||||
ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
|
||||
unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
|
||||
Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
|
||||
return new TruncInst(Lshr, DestTy);
|
||||
}
|
||||
|
||||
if (!X->getType()->isVectorTy())
|
||||
return nullptr;
|
||||
|
||||
// If this extractelement is using a bitcast from a vector of the same number
|
||||
// of elements, see if we can find the source element from the source vector:
|
||||
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
|
||||
auto *SrcTy = cast<VectorType>(X->getType());
|
||||
Type *DestTy = Ext.getType();
|
||||
ElementCount NumSrcElts = SrcTy->getElementCount();
|
||||
ElementCount NumElts =
|
||||
cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
|
||||
if (NumSrcElts == NumElts)
|
||||
if (Value *Elt = findScalarElement(X, ExtIndexC))
|
||||
return new BitCastInst(Elt, DestTy);
|
||||
@@ -410,7 +432,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
|
||||
if (Instruction *I = foldBitcastExtElt(EI))
|
||||
return I;
|
||||
|
||||
// If there's a vector PHI feeding a scalar use through this extractelement
|
||||
|
||||
@@ -36,12 +36,12 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -109,12 +109,12 @@ define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -178,12 +178,12 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -251,12 +251,12 @@ define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -320,12 +320,12 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -393,12 +393,12 @@ define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -462,12 +462,12 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -535,12 +535,12 @@ define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -924,11 +924,11 @@ define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x flo
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -953,10 +953,10 @@ define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1000,11 +1000,11 @@ define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
@@ -1025,10 +1025,10 @@ define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x doub
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %1, i64 0
|
||||
@@ -1066,11 +1066,11 @@ define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1095,10 +1095,10 @@ define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1140,11 +1140,11 @@ define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
@@ -1165,10 +1165,10 @@ define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %1, i64 0
|
||||
@@ -1206,11 +1206,11 @@ define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1235,10 +1235,10 @@ define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1280,11 +1280,11 @@ define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1305,10 +1305,10 @@ define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %a, i64 0
|
||||
@@ -1348,11 +1348,11 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1381,10 +1381,10 @@ define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
|
||||
; CHECK-NEXT: ret float [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
|
||||
; CHECK-NEXT: ret float [[TMP8]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1453,11 +1453,11 @@ define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1482,10 +1482,10 @@ define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
|
||||
; CHECK-NEXT: ret double [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
|
||||
; CHECK-NEXT: ret double [[TMP8]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
|
||||
@@ -1549,11 +1549,11 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP10]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1584,10 +1584,10 @@ define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
|
||||
; CHECK-NEXT: ret float [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
|
||||
; CHECK-NEXT: ret float [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1660,11 +1660,11 @@ define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP10]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1691,10 +1691,10 @@ define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x do
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
|
||||
; CHECK-NEXT: ret double [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
|
||||
; CHECK-NEXT: ret double [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
|
||||
|
||||
@@ -36,12 +36,12 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -109,12 +109,12 @@ define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -178,12 +178,12 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -251,12 +251,12 @@ define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -320,12 +320,12 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -393,12 +393,12 @@ define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -462,12 +462,12 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -535,12 +535,12 @@ define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
|
||||
@@ -924,11 +924,11 @@ define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x flo
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -953,10 +953,10 @@ define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1000,11 +1000,11 @@ define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
@@ -1025,10 +1025,10 @@ define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x doub
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %1, i64 0
|
||||
@@ -1066,11 +1066,11 @@ define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1095,10 +1095,10 @@ define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1140,11 +1140,11 @@ define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
@@ -1165,10 +1165,10 @@ define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %1, i64 0
|
||||
@@ -1206,11 +1206,11 @@ define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1235,10 +1235,10 @@ define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
|
||||
; CHECK-NEXT: ret float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
|
||||
; CHECK-NEXT: ret float [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1280,11 +1280,11 @@ define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP7]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1305,10 +1305,10 @@ define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
|
||||
; CHECK-NEXT: ret double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[TMP6]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = extractelement <2 x double> %a, i64 0
|
||||
@@ -1348,11 +1348,11 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x fl
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1381,10 +1381,10 @@ define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
|
||||
; CHECK-NEXT: ret float [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
|
||||
; CHECK-NEXT: ret float [[TMP8]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1453,11 +1453,11 @@ define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1482,10 +1482,10 @@ define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
|
||||
; CHECK-NEXT: ret double [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
|
||||
; CHECK-NEXT: ret double [[TMP8]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
|
||||
@@ -1549,11 +1549,11 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP10]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1584,10 +1584,10 @@ define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
|
||||
; CHECK-NEXT: ret float [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
|
||||
; CHECK-NEXT: ret float [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
@@ -1660,11 +1660,11 @@ define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP10]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
@@ -1691,10 +1691,10 @@ define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x do
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
|
||||
; CHECK-NEXT: ret double [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
|
||||
; CHECK-NEXT: ret double [[TMP9]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
|
||||
|
||||
@@ -330,11 +330,17 @@ define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, do
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
; i32 is a desirable/supported type independent of data layout.
|
||||
|
||||
define i8 @bitcast_scalar_supported_type_index0(i32 %x) {
|
||||
; ANY-LABEL: @bitcast_scalar_supported_type_index0(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8>
|
||||
; ANY-NEXT: [[R:%.*]] = extractelement <4 x i8> [[V]], i8 0
|
||||
; ANY-NEXT: ret i8 [[R]]
|
||||
; LE-LABEL: @bitcast_scalar_supported_type_index0(
|
||||
; LE-NEXT: [[R:%.*]] = trunc i32 [[X:%.*]] to i8
|
||||
; LE-NEXT: ret i8 [[R]]
|
||||
;
|
||||
; BE-LABEL: @bitcast_scalar_supported_type_index0(
|
||||
; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 24
|
||||
; BE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8
|
||||
; BE-NEXT: ret i8 [[R]]
|
||||
;
|
||||
%v = bitcast i32 %x to <4 x i8>
|
||||
%r = extractelement <4 x i8> %v, i8 0
|
||||
@@ -342,27 +348,41 @@ define i8 @bitcast_scalar_supported_type_index0(i32 %x) {
|
||||
}
|
||||
|
||||
define i8 @bitcast_scalar_supported_type_index2(i32 %x) {
|
||||
; ANY-LABEL: @bitcast_scalar_supported_type_index2(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8>
|
||||
; ANY-NEXT: [[R:%.*]] = extractelement <4 x i8> [[V]], i64 2
|
||||
; ANY-NEXT: ret i8 [[R]]
|
||||
; LE-LABEL: @bitcast_scalar_supported_type_index2(
|
||||
; LE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 16
|
||||
; LE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8
|
||||
; LE-NEXT: ret i8 [[R]]
|
||||
;
|
||||
; BE-LABEL: @bitcast_scalar_supported_type_index2(
|
||||
; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i32 [[X:%.*]], 8
|
||||
; BE-NEXT: [[R:%.*]] = trunc i32 [[EXTELT_OFFSET]] to i8
|
||||
; BE-NEXT: ret i8 [[R]]
|
||||
;
|
||||
%v = bitcast i32 %x to <4 x i8>
|
||||
%r = extractelement <4 x i8> %v, i64 2
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
; i64 is legal based on data layout.
|
||||
|
||||
define i4 @bitcast_scalar_legal_type_index3(i64 %x) {
|
||||
; ANY-LABEL: @bitcast_scalar_legal_type_index3(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <16 x i4>
|
||||
; ANY-NEXT: [[R:%.*]] = extractelement <16 x i4> [[V]], i64 3
|
||||
; ANY-NEXT: ret i4 [[R]]
|
||||
; LE-LABEL: @bitcast_scalar_legal_type_index3(
|
||||
; LE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 12
|
||||
; LE-NEXT: [[R:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i4
|
||||
; LE-NEXT: ret i4 [[R]]
|
||||
;
|
||||
; BE-LABEL: @bitcast_scalar_legal_type_index3(
|
||||
; BE-NEXT: [[EXTELT_OFFSET:%.*]] = lshr i64 [[X:%.*]], 48
|
||||
; BE-NEXT: [[R:%.*]] = trunc i64 [[EXTELT_OFFSET]] to i4
|
||||
; BE-NEXT: ret i4 [[R]]
|
||||
;
|
||||
%v = bitcast i64 %x to <16 x i4>
|
||||
%r = extractelement <16 x i4> %v, i64 3
|
||||
ret i4 %r
|
||||
}
|
||||
|
||||
; negative test - don't create a shift for an illegal type.
|
||||
|
||||
define i8 @bitcast_scalar_illegal_type_index1(i128 %x) {
|
||||
; ANY-LABEL: @bitcast_scalar_illegal_type_index1(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i128 [[X:%.*]] to <16 x i8>
|
||||
@@ -374,6 +394,8 @@ define i8 @bitcast_scalar_illegal_type_index1(i128 %x) {
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
; negative test - can't use shift/trunc on FP
|
||||
|
||||
define i8 @bitcast_fp_index0(float %x) {
|
||||
; ANY-LABEL: @bitcast_fp_index0(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
|
||||
@@ -385,6 +407,8 @@ define i8 @bitcast_fp_index0(float %x) {
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
; negative test - can't have FP dest type without a cast
|
||||
|
||||
define half @bitcast_fpvec_index0(i32 %x) {
|
||||
; ANY-LABEL: @bitcast_fpvec_index0(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <2 x half>
|
||||
@@ -396,6 +420,8 @@ define half @bitcast_fpvec_index0(i32 %x) {
|
||||
ret half %r
|
||||
}
|
||||
|
||||
; negative test - need constant index
|
||||
|
||||
define i8 @bitcast_scalar_index_variable(i32 %x, i64 %y) {
|
||||
; ANY-LABEL: @bitcast_scalar_index_variable(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8>
|
||||
@@ -407,6 +433,8 @@ define i8 @bitcast_scalar_index_variable(i32 %x, i64 %y) {
|
||||
ret i8 %r
|
||||
}
|
||||
|
||||
; negative test - no extra uses
|
||||
|
||||
define i8 @bitcast_scalar_index0_use(i64 %x) {
|
||||
; ANY-LABEL: @bitcast_scalar_index0_use(
|
||||
; ANY-NEXT: [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
|
||||
|
||||
Reference in New Issue
Block a user