[VPlan] Unroll VPReplicateRecipe by VF. (#142433)
Explicitly unroll VPReplicateRecipes outside replicate regions by VF, replacing them by VF single-scalar recipes. Extracts for operands are added as needed and the scalar results are combined to a vector using a new BuildVector VPInstruction. It also adds a few folds to simplify unnecessary extracts/BuildVectors. It also adds a BuildStructVector opcode for handling of calls that have struct return types. VPReplicateRecipe in replicate regions can will be unrolled as follow up, turing non-single-scalar VPReplicateRecipes into 'abstract', i.e. not executable. PR: https://github.com/llvm/llvm-project/pull/142433
This commit is contained in:
@@ -7328,6 +7328,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
|
||||
// cost model is complete for better cost estimates.
|
||||
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
|
||||
OrigLoop->getHeader()->getContext());
|
||||
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
|
||||
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
|
||||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
|
||||
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
|
||||
|
||||
@@ -261,6 +261,13 @@ Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
|
||||
return Data.VPV2Scalars[Def][0];
|
||||
}
|
||||
|
||||
// Look through BuildVector to avoid redundant extracts.
|
||||
// TODO: Remove once replicate regions are unrolled explicitly.
|
||||
if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
|
||||
auto *BuildVector = cast<VPInstruction>(Def);
|
||||
return get(BuildVector->getOperand(Lane.getKnownLane()), true);
|
||||
}
|
||||
|
||||
assert(hasVectorValue(Def));
|
||||
auto *VecPart = Data.VPV2Vector[Def];
|
||||
if (!VecPart->getType()->isVectorTy()) {
|
||||
|
||||
@@ -936,6 +936,13 @@ public:
|
||||
BranchOnCount,
|
||||
BranchOnCond,
|
||||
Broadcast,
|
||||
/// Given operands of (the same) struct type, creates a struct of fixed-
|
||||
/// width vectors each containing a struct field of all operands. The
|
||||
/// number of operands matches the element count of every vector.
|
||||
BuildStructVector,
|
||||
/// Creates a fixed-width vector containing all operands. The number of
|
||||
/// operands matches the vector element count.
|
||||
BuildVector,
|
||||
ComputeAnyOfResult,
|
||||
ComputeFindLastIVResult,
|
||||
ComputeReductionResult,
|
||||
|
||||
@@ -108,6 +108,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
|
||||
case VPInstruction::CalculateTripCountMinusVF:
|
||||
case VPInstruction::CanonicalIVIncrementForPart:
|
||||
case VPInstruction::AnyOf:
|
||||
case VPInstruction::BuildStructVector:
|
||||
case VPInstruction::BuildVector:
|
||||
return SetResultTyFromOp();
|
||||
case VPInstruction::FirstActiveLane:
|
||||
return Type::getIntNTy(Ctx, 64);
|
||||
|
||||
@@ -221,6 +221,13 @@ struct Recipe_match {
|
||||
}
|
||||
|
||||
bool match(const VPRecipeBase *R) const {
|
||||
if (std::tuple_size<Ops_t>::value == 0) {
|
||||
assert(Opcode == VPInstruction::BuildVector &&
|
||||
"can only match BuildVector with empty ops");
|
||||
auto *VPI = dyn_cast<VPInstruction>(R);
|
||||
return VPI && VPI->getOpcode() == VPInstruction::BuildVector;
|
||||
}
|
||||
|
||||
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...))
|
||||
return false;
|
||||
|
||||
@@ -263,6 +270,10 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned Opcode, typename... RecipeTys>
|
||||
using ZeroOpRecipe_match =
|
||||
Recipe_match<std::tuple<>, Opcode, false, RecipeTys...>;
|
||||
|
||||
template <typename Op0_t, unsigned Opcode, typename... RecipeTys>
|
||||
using UnaryRecipe_match =
|
||||
Recipe_match<std::tuple<Op0_t>, Opcode, false, RecipeTys...>;
|
||||
@@ -271,6 +282,9 @@ template <typename Op0_t, unsigned Opcode>
|
||||
using UnaryVPInstruction_match =
|
||||
UnaryRecipe_match<Op0_t, Opcode, VPInstruction>;
|
||||
|
||||
template <unsigned Opcode>
|
||||
using ZeroOpVPInstruction_match = ZeroOpRecipe_match<Opcode, VPInstruction>;
|
||||
|
||||
template <typename Op0_t, unsigned Opcode>
|
||||
using AllUnaryRecipe_match =
|
||||
UnaryRecipe_match<Op0_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
|
||||
@@ -302,6 +316,12 @@ using AllBinaryRecipe_match =
|
||||
BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPWidenRecipe,
|
||||
VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>;
|
||||
|
||||
/// BuildVector is matches only its opcode, w/o matching its operands as the
|
||||
/// number of operands is not fixed.
|
||||
inline ZeroOpVPInstruction_match<VPInstruction::BuildVector> m_BuildVector() {
|
||||
return ZeroOpVPInstruction_match<VPInstruction::BuildVector>();
|
||||
}
|
||||
|
||||
template <unsigned Opcode, typename Op0_t>
|
||||
inline UnaryVPInstruction_match<Op0_t, Opcode>
|
||||
m_VPInstruction(const Op0_t &Op0) {
|
||||
|
||||
@@ -551,6 +551,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
||||
}
|
||||
case Instruction::ExtractElement: {
|
||||
assert(State.VF.isVector() && "Only extract elements from vectors");
|
||||
if (getOperand(1)->isLiveIn()) {
|
||||
unsigned IdxToExtract =
|
||||
cast<ConstantInt>(getOperand(1)->getLiveInIRValue())->getZExtValue();
|
||||
return State.get(getOperand(0), VPLane(IdxToExtract));
|
||||
}
|
||||
Value *Vec = State.get(getOperand(0));
|
||||
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);
|
||||
return Builder.CreateExtractElement(Vec, Idx, Name);
|
||||
@@ -664,6 +669,34 @@ Value *VPInstruction::generate(VPTransformState &State) {
|
||||
return Builder.CreateVectorSplat(
|
||||
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
|
||||
}
|
||||
case VPInstruction::BuildStructVector: {
|
||||
// For struct types, we need to build a new 'wide' struct type, where each
|
||||
// element is widened, i.e., we create a struct of vectors.
|
||||
auto *StructTy =
|
||||
cast<StructType>(State.TypeAnalysis.inferScalarType(getOperand(0)));
|
||||
Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));
|
||||
for (const auto &[LaneIndex, Op] : enumerate(operands())) {
|
||||
for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
|
||||
FieldIndex++) {
|
||||
Value *ScalarValue =
|
||||
Builder.CreateExtractValue(State.get(Op, true), FieldIndex);
|
||||
Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);
|
||||
VectorValue =
|
||||
Builder.CreateInsertElement(VectorValue, ScalarValue, LaneIndex);
|
||||
Res = Builder.CreateInsertValue(Res, VectorValue, FieldIndex);
|
||||
}
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
case VPInstruction::BuildVector: {
|
||||
auto *ScalarTy = State.TypeAnalysis.inferScalarType(getOperand(0));
|
||||
auto NumOfElements = ElementCount::getFixed(getNumOperands());
|
||||
Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));
|
||||
for (const auto &[Idx, Op] : enumerate(operands()))
|
||||
Res = State.Builder.CreateInsertElement(Res, State.get(Op, true),
|
||||
State.Builder.getInt32(Idx));
|
||||
return Res;
|
||||
}
|
||||
case VPInstruction::ReductionStartVector: {
|
||||
if (State.VF.isScalar())
|
||||
return State.get(getOperand(0), true);
|
||||
@@ -953,10 +986,11 @@ void VPInstruction::execute(VPTransformState &State) {
|
||||
if (!hasResult())
|
||||
return;
|
||||
assert(GeneratedValue && "generate must produce a value");
|
||||
assert(
|
||||
(GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
|
||||
State.VF.isScalar()) &&
|
||||
"scalar value but not only first lane defined");
|
||||
assert((((GeneratedValue->getType()->isVectorTy() ||
|
||||
GeneratedValue->getType()->isStructTy()) ==
|
||||
!GeneratesPerFirstLaneOnly) ||
|
||||
State.VF.isScalar()) &&
|
||||
"scalar value but not only first lane defined");
|
||||
State.set(this, GeneratedValue,
|
||||
/*IsScalar*/ GeneratesPerFirstLaneOnly);
|
||||
}
|
||||
@@ -970,6 +1004,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
|
||||
case Instruction::ICmp:
|
||||
case Instruction::Select:
|
||||
case VPInstruction::AnyOf:
|
||||
case VPInstruction::BuildStructVector:
|
||||
case VPInstruction::BuildVector:
|
||||
case VPInstruction::CalculateTripCountMinusVF:
|
||||
case VPInstruction::CanonicalIVIncrementForPart:
|
||||
case VPInstruction::ExtractLastElement:
|
||||
@@ -1092,6 +1128,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
|
||||
case VPInstruction::Broadcast:
|
||||
O << "broadcast";
|
||||
break;
|
||||
case VPInstruction::BuildStructVector:
|
||||
O << "buildstructvector";
|
||||
break;
|
||||
case VPInstruction::BuildVector:
|
||||
O << "buildvector";
|
||||
break;
|
||||
case VPInstruction::ExtractLastElement:
|
||||
O << "extract-last-element";
|
||||
break;
|
||||
@@ -2686,45 +2728,27 @@ static void scalarizeInstruction(const Instruction *Instr,
|
||||
|
||||
void VPReplicateRecipe::execute(VPTransformState &State) {
|
||||
Instruction *UI = getUnderlyingInstr();
|
||||
if (State.Lane) { // Generate a single instance.
|
||||
assert((State.VF.isScalar() || !isSingleScalar()) &&
|
||||
"uniform recipe shouldn't be predicated");
|
||||
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
|
||||
scalarizeInstruction(UI, this, *State.Lane, State);
|
||||
// Insert scalar instance packing it into a vector.
|
||||
if (State.VF.isVector() && shouldPack()) {
|
||||
Value *WideValue;
|
||||
// If we're constructing lane 0, initialize to start from poison.
|
||||
if (State.Lane->isFirstLane()) {
|
||||
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
|
||||
WideValue = PoisonValue::get(VectorType::get(UI->getType(), State.VF));
|
||||
} else {
|
||||
WideValue = State.get(this);
|
||||
}
|
||||
State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,
|
||||
*State.Lane));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsSingleScalar) {
|
||||
// Uniform within VL means we need to generate lane 0.
|
||||
if (!State.Lane) {
|
||||
assert(IsSingleScalar && "VPReplicateRecipes outside replicate regions "
|
||||
"must have already been unrolled");
|
||||
scalarizeInstruction(UI, this, VPLane(0), State);
|
||||
return;
|
||||
}
|
||||
|
||||
// A store of a loop varying value to a uniform address only needs the last
|
||||
// copy of the store.
|
||||
if (isa<StoreInst>(UI) && vputils::isSingleScalar(getOperand(1))) {
|
||||
auto Lane = VPLane::getLastLaneForVF(State.VF);
|
||||
scalarizeInstruction(UI, this, VPLane(Lane), State);
|
||||
return;
|
||||
assert((State.VF.isScalar() || !isSingleScalar()) &&
|
||||
"uniform recipe shouldn't be predicated");
|
||||
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
|
||||
scalarizeInstruction(UI, this, *State.Lane, State);
|
||||
// Insert scalar instance packing it into a vector.
|
||||
if (State.VF.isVector() && shouldPack()) {
|
||||
Value *WideValue =
|
||||
State.Lane->isFirstLane()
|
||||
? PoisonValue::get(VectorType::get(UI->getType(), State.VF))
|
||||
: State.get(this);
|
||||
State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,
|
||||
*State.Lane));
|
||||
}
|
||||
|
||||
// Generate scalar instances for all VF lanes.
|
||||
const unsigned EndLane = State.VF.getFixedValue();
|
||||
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
|
||||
scalarizeInstruction(UI, this, VPLane(Lane), State);
|
||||
}
|
||||
|
||||
bool VPReplicateRecipe::shouldPack() const {
|
||||
|
||||
@@ -1140,6 +1140,24 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Look through ExtractLastElement (BuildVector ....).
|
||||
if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
|
||||
m_BuildVector()))) {
|
||||
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
|
||||
Def->replaceAllUsesWith(
|
||||
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
|
||||
return;
|
||||
}
|
||||
|
||||
// Look through ExtractPenultimateElement (BuildVector ....).
|
||||
if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
|
||||
m_BuildVector()))) {
|
||||
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
|
||||
Def->replaceAllUsesWith(
|
||||
BuildVector->getOperand(BuildVector->getNumOperands() - 2));
|
||||
return;
|
||||
}
|
||||
|
||||
// Some simplifications can only be applied after unrolling. Perform them
|
||||
// below.
|
||||
if (!Plan->isUnrolled())
|
||||
|
||||
@@ -99,6 +99,12 @@ struct VPlanTransforms {
|
||||
/// Explicitly unroll \p Plan by \p UF.
|
||||
static void unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx);
|
||||
|
||||
/// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
|
||||
/// with \p VF single-scalar recipes.
|
||||
/// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
|
||||
/// dissolving the latter.
|
||||
static void replicateByVF(VPlan &Plan, ElementCount VF);
|
||||
|
||||
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
|
||||
/// resulting plan to \p BestVF and \p BestUF.
|
||||
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "VPlan.h"
|
||||
#include "VPlanAnalysis.h"
|
||||
#include "VPlanCFG.h"
|
||||
#include "VPlanHelpers.h"
|
||||
#include "VPlanPatternMatch.h"
|
||||
#include "VPlanTransforms.h"
|
||||
#include "VPlanUtils.h"
|
||||
@@ -450,3 +451,87 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
|
||||
|
||||
VPlanTransforms::removeDeadRecipes(Plan);
|
||||
}
|
||||
|
||||
/// Create a single-scalar clone of \p RepR for lane \p Lane.
|
||||
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
|
||||
Type *IdxTy, VPReplicateRecipe *RepR,
|
||||
VPLane Lane) {
|
||||
// Collect the operands at Lane, creating extracts as needed.
|
||||
SmallVector<VPValue *> NewOps;
|
||||
for (VPValue *Op : RepR->operands()) {
|
||||
if (vputils::isSingleScalar(Op)) {
|
||||
NewOps.push_back(Op);
|
||||
continue;
|
||||
}
|
||||
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
|
||||
NewOps.push_back(
|
||||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
|
||||
continue;
|
||||
}
|
||||
// Look through buildvector to avoid unnecessary extracts.
|
||||
if (match(Op, m_BuildVector())) {
|
||||
NewOps.push_back(
|
||||
cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
|
||||
continue;
|
||||
}
|
||||
VPValue *Idx =
|
||||
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
|
||||
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
|
||||
NewOps.push_back(Ext);
|
||||
}
|
||||
|
||||
auto *New =
|
||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
|
||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
|
||||
New->insertBefore(RepR);
|
||||
return New;
|
||||
}
|
||||
|
||||
void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
Type *IdxTy = IntegerType::get(
|
||||
Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
|
||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
||||
if (!RepR || RepR->isSingleScalar())
|
||||
continue;
|
||||
|
||||
VPBuilder Builder(RepR);
|
||||
if (RepR->getNumUsers() == 0) {
|
||||
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
|
||||
vputils::isSingleScalar(RepR->getOperand(1))) {
|
||||
// Stores to invariant addresses need to store the last lane only.
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR,
|
||||
VPLane::getLastLaneForVF(VF));
|
||||
} else {
|
||||
// Create single-scalar version of RepR for all lanes.
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
|
||||
}
|
||||
RepR->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
/// Create single-scalar version of RepR for all lanes.
|
||||
SmallVector<VPValue *> LaneDefs;
|
||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
|
||||
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
|
||||
|
||||
/// Users that only demand the first lane can use the definition for lane
|
||||
/// 0.
|
||||
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
|
||||
return U.onlyFirstLaneUsed(RepR);
|
||||
});
|
||||
|
||||
// If needed, create a Build(Struct)Vector recipe to insert the scalar
|
||||
// lane values into a vector.
|
||||
Type *ResTy = RepR->getUnderlyingInstr()->getType();
|
||||
VPValue *VecRes = Builder.createNaryOp(
|
||||
ResTy->isStructTy() ? VPInstruction::BuildStructVector
|
||||
: VPInstruction::BuildVector,
|
||||
LaneDefs);
|
||||
RepR->replaceAllUsesWith(VecRes);
|
||||
RepR->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -393,12 +393,6 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
|
||||
; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 0
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP31]], align 4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 1
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP33]], align 4
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 2
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP35]], align 4
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 3
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP37]], align 4
|
||||
; CHECK-NEXT: store float [[TMP36]], ptr [[B:%.*]], align 4
|
||||
|
||||
@@ -32,42 +32,31 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 104
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 112
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 120
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[NEXT_GEP18:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP13]]
|
||||
; CHECK-NEXT: [[NEXT_GEP19:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 4
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 4
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 4
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 4
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[NEXT_GEP6]], i64 4
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[NEXT_GEP7]], i64 4
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i64 4
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i64 4
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 4
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i64 4
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[NEXT_GEP12]], i64 4
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[NEXT_GEP13]], i64 4
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[NEXT_GEP14]], i64 4
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[NEXT_GEP15]], i64 4
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[NEXT_GEP16]], i64 4
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP16]], i32 -4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP20]], i32 -4
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP24]], i32 -4
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP28]], i32 -4
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP28]], i32 -4
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP29]], i32 -4
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP30]], i32 -4
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
@@ -85,7 +74,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = add <4 x i32> [[STRIDED_VEC23]], [[STRIDED_VEC22]]
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = add <4 x i32> [[STRIDED_VEC26]], [[STRIDED_VEC25]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[TMP36]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP40]], ptr [[NEXT_GEP]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP40]], ptr [[NEXT_GEP12]], align 4
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[TMP36]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP41]], ptr [[NEXT_GEP2]], align 4
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i32> [[TMP36]], i32 2
|
||||
@@ -93,7 +82,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i32> [[TMP36]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP43]], ptr [[NEXT_GEP4]], align 4
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[TMP37]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP44]], ptr [[NEXT_GEP5]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP44]], ptr [[NEXT_GEP13]], align 4
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[TMP37]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP45]], ptr [[NEXT_GEP6]], align 4
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP37]], i32 2
|
||||
@@ -101,19 +90,19 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP37]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP47]], ptr [[NEXT_GEP8]], align 4
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP38]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP48]], ptr [[NEXT_GEP9]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP48]], ptr [[NEXT_GEP14]], align 4
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP38]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP49]], ptr [[NEXT_GEP10]], align 4
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i32> [[TMP38]], i32 2
|
||||
; CHECK-NEXT: store i32 [[TMP50]], ptr [[NEXT_GEP11]], align 4
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i32> [[TMP38]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP51]], ptr [[NEXT_GEP12]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP51]], ptr [[NEXT_GEP17]], align 4
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i32> [[TMP39]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP52]], ptr [[NEXT_GEP13]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP52]], ptr [[NEXT_GEP15]], align 4
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP39]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP53]], ptr [[NEXT_GEP14]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP53]], ptr [[NEXT_GEP18]], align 4
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[TMP39]], i32 2
|
||||
; CHECK-NEXT: store i32 [[TMP54]], ptr [[NEXT_GEP15]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP54]], ptr [[NEXT_GEP19]], align 4
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP39]], i32 3
|
||||
; CHECK-NEXT: store i32 [[TMP55]], ptr [[NEXT_GEP16]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||
@@ -123,11 +112,11 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
||||
; CHECK: [[SCALAR_PH]]:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[M]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 97, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i32 [ 97, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
; CHECK: [[LOOP]]:
|
||||
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL26]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
||||
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8
|
||||
; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[P_4]], align 4
|
||||
|
||||
@@ -10,6 +10,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
|
||||
@@ -1055,16 +1055,10 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP9]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
|
||||
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4
|
||||
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4
|
||||
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
@@ -1152,16 +1146,10 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
|
||||
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]]
|
||||
; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
|
||||
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
|
||||
; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP9]]
|
||||
; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
|
||||
; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
|
||||
; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4
|
||||
; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4
|
||||
; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4
|
||||
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
|
||||
@@ -134,18 +134,14 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
|
||||
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[CMP]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT6]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 0
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]]
|
||||
; CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4, !alias.scope [[META12]]
|
||||
; CHECK-NEXT: store i32 [[K]], ptr [[A]], align 4, !alias.scope [[META12]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
||||
|
||||
@@ -1101,7 +1101,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
|
||||
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
|
||||
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
|
||||
; VEC-NEXT: [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]]
|
||||
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
|
||||
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
|
||||
@@ -1293,8 +1292,6 @@ define i32 @iv_ext_used_outside( ptr %dst) {
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0
|
||||
; VEC-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4
|
||||
; VEC-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i16> [[VEC_IND]], splat (i16 1)
|
||||
; VEC-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0
|
||||
; VEC-NEXT: [[TMP4:%.*]] = zext nneg i16 [[TMP3]] to i32
|
||||
; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
|
||||
; VEC-NEXT: [[TMP7:%.*]] = zext nneg i16 [[TMP8]] to i32
|
||||
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
@@ -1389,9 +1386,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
|
||||
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1
|
||||
; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2
|
||||
; VEC-NEXT: [[TMP3:%.*]] = add i64 2, -1
|
||||
; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1
|
||||
; VEC-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 1
|
||||
; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1
|
||||
; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
||||
; VEC: [[MIDDLE_BLOCK]]:
|
||||
|
||||
@@ -669,6 +669,8 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
|
||||
@@ -23,19 +23,19 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
|
||||
; VF4-NEXT: [[TMP9:%.*]] = tail call { i64 } @fn1(float [[TMP8]]) #[[ATTR0]]
|
||||
; VF4-NEXT: [[TMP10:%.*]] = extractvalue { i64 } [[TMP3]], 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i32 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i64 0
|
||||
; VF4-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i64> } poison, <4 x i64> [[TMP11]], 0
|
||||
; VF4-NEXT: [[TMP13:%.*]] = extractvalue { i64 } [[TMP5]], 0
|
||||
; VF4-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i64> } [[TMP12]], 0
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP13]], i32 1
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP13]], i64 1
|
||||
; VF4-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i64> } [[TMP12]], <4 x i64> [[TMP15]], 0
|
||||
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { i64 } [[TMP7]], 0
|
||||
; VF4-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i64> } [[TMP16]], 0
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP17]], i32 2
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP17]], i64 2
|
||||
; VF4-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i64> } [[TMP16]], <4 x i64> [[TMP19]], 0
|
||||
; VF4-NEXT: [[TMP21:%.*]] = extractvalue { i64 } [[TMP9]], 0
|
||||
; VF4-NEXT: [[TMP22:%.*]] = extractvalue { <4 x i64> } [[TMP20]], 0
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP21]], i32 3
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP21]], i64 3
|
||||
; VF4-NEXT: [[TMP24:%.*]] = insertvalue { <4 x i64> } [[TMP20]], <4 x i64> [[TMP23]], 0
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractvalue { <4 x i64> } [[TMP24]], 0
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[OUT_A]], i64 [[INDEX]]
|
||||
@@ -64,22 +64,22 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { i64 } @fn1(float [[TMP5]]) #[[ATTR0]]
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = extractvalue { i64 } [[TMP4]], 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertvalue { <2 x i64> } poison, <2 x i64> [[TMP8]], 0
|
||||
; VF2IC2-NEXT: [[TMP10:%.*]] = extractvalue { i64 } [[TMP6]], 0
|
||||
; VF2IC2-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64> } [[TMP9]], 0
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i64> } [[TMP9]], <2 x i64> [[TMP12]], 0
|
||||
; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP15:%.*]] = tail call { i64 } @fn1(float [[TMP14]]) #[[ATTR0]]
|
||||
; VF2IC2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP17:%.*]] = tail call { i64 } @fn1(float [[TMP16]]) #[[ATTR0]]
|
||||
; VF2IC2-NEXT: [[TMP18:%.*]] = extractvalue { i64 } [[TMP15]], 0
|
||||
; VF2IC2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> poison, i64 [[TMP18]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> poison, i64 [[TMP18]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i64> } poison, <2 x i64> [[TMP19]], 0
|
||||
; VF2IC2-NEXT: [[TMP21:%.*]] = extractvalue { i64 } [[TMP17]], 0
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = extractvalue { <2 x i64> } [[TMP20]], 0
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP21]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP21]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP24:%.*]] = insertvalue { <2 x i64> } [[TMP20]], <2 x i64> [[TMP23]], 0
|
||||
; VF2IC2-NEXT: [[TMP25:%.*]] = extractvalue { <2 x i64> } [[TMP13]], 0
|
||||
; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { <2 x i64> } [[TMP24]], 0
|
||||
@@ -133,35 +133,35 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
|
||||
; VF4-NEXT: [[TMP9:%.*]] = tail call { float, float } @fn2(float [[TMP8]]) #[[ATTR1]]
|
||||
; VF4-NEXT: [[TMP10:%.*]] = extractvalue { float, float } [[TMP3]], 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
|
||||
; VF4-NEXT: [[TMP12:%.*]] = insertvalue { <4 x float>, <4 x float> } poison, <4 x float> [[TMP11]], 0
|
||||
; VF4-NEXT: [[TMP13:%.*]] = extractvalue { float, float } [[TMP3]], 1
|
||||
; VF4-NEXT: [[TMP14:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 1
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP13]], i32 0
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP13]], i64 0
|
||||
; VF4-NEXT: [[TMP16:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP15]], 1
|
||||
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { float, float } [[TMP5]], 0
|
||||
; VF4-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP16]], 0
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP17]], i32 1
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP17]], i64 1
|
||||
; VF4-NEXT: [[TMP20:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP16]], <4 x float> [[TMP19]], 0
|
||||
; VF4-NEXT: [[TMP21:%.*]] = extractvalue { float, float } [[TMP5]], 1
|
||||
; VF4-NEXT: [[TMP22:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP20]], 1
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP21]], i32 1
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP21]], i64 1
|
||||
; VF4-NEXT: [[TMP24:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP20]], <4 x float> [[TMP23]], 1
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractvalue { float, float } [[TMP7]], 0
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP24]], 0
|
||||
; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP26]], float [[TMP25]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP26]], float [[TMP25]], i64 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP24]], <4 x float> [[TMP27]], 0
|
||||
; VF4-NEXT: [[TMP29:%.*]] = extractvalue { float, float } [[TMP7]], 1
|
||||
; VF4-NEXT: [[TMP30:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP28]], 1
|
||||
; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP29]], i32 2
|
||||
; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP29]], i64 2
|
||||
; VF4-NEXT: [[TMP32:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP28]], <4 x float> [[TMP31]], 1
|
||||
; VF4-NEXT: [[TMP33:%.*]] = extractvalue { float, float } [[TMP9]], 0
|
||||
; VF4-NEXT: [[TMP34:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP32]], 0
|
||||
; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP33]], i32 3
|
||||
; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP33]], i64 3
|
||||
; VF4-NEXT: [[TMP36:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP32]], <4 x float> [[TMP35]], 0
|
||||
; VF4-NEXT: [[TMP37:%.*]] = extractvalue { float, float } [[TMP9]], 1
|
||||
; VF4-NEXT: [[TMP38:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP36]], 1
|
||||
; VF4-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP37]], i32 3
|
||||
; VF4-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP37]], i64 3
|
||||
; VF4-NEXT: [[TMP40:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP36]], <4 x float> [[TMP39]], 1
|
||||
; VF4-NEXT: [[TMP41:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP40]], 0
|
||||
; VF4-NEXT: [[TMP42:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP40]], 1
|
||||
@@ -194,38 +194,38 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR1]]
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = extractvalue { float, float } [[TMP4]], 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[TMP8]], 0
|
||||
; VF2IC2-NEXT: [[TMP10:%.*]] = extractvalue { float, float } [[TMP4]], 1
|
||||
; VF2IC2-NEXT: [[TMP11:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP9]], 1
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP10]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP11]], float [[TMP10]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP13:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP9]], <2 x float> [[TMP12]], 1
|
||||
; VF2IC2-NEXT: [[TMP14:%.*]] = extractvalue { float, float } [[TMP6]], 0
|
||||
; VF2IC2-NEXT: [[TMP15:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP13]], 0
|
||||
; VF2IC2-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP17:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP13]], <2 x float> [[TMP16]], 0
|
||||
; VF2IC2-NEXT: [[TMP18:%.*]] = extractvalue { float, float } [[TMP6]], 1
|
||||
; VF2IC2-NEXT: [[TMP19:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP17]], 1
|
||||
; VF2IC2-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP21:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP17]], <2 x float> [[TMP20]], 1
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = tail call { float, float } @fn2(float [[TMP22]]) #[[ATTR1]]
|
||||
; VF2IC2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP25:%.*]] = tail call { float, float } @fn2(float [[TMP24]]) #[[ATTR1]]
|
||||
; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { float, float } [[TMP23]], 0
|
||||
; VF2IC2-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[TMP27]], 0
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = extractvalue { float, float } [[TMP23]], 1
|
||||
; VF2IC2-NEXT: [[TMP30:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP28]], 1
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP32:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP28]], <2 x float> [[TMP31]], 1
|
||||
; VF2IC2-NEXT: [[TMP33:%.*]] = extractvalue { float, float } [[TMP25]], 0
|
||||
; VF2IC2-NEXT: [[TMP34:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP32]], 0
|
||||
; VF2IC2-NEXT: [[TMP35:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP35:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP36:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP32]], <2 x float> [[TMP35]], 0
|
||||
; VF2IC2-NEXT: [[TMP37:%.*]] = extractvalue { float, float } [[TMP25]], 1
|
||||
; VF2IC2-NEXT: [[TMP38:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP36]], 1
|
||||
; VF2IC2-NEXT: [[TMP39:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP39:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP40:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP36]], <2 x float> [[TMP39]], 1
|
||||
; VF2IC2-NEXT: [[TMP41:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP21]], 0
|
||||
; VF2IC2-NEXT: [[TMP42:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP40]], 0
|
||||
@@ -290,51 +290,51 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
|
||||
; VF4-NEXT: [[TMP9:%.*]] = tail call { i32, i32, i32 } @fn3(i32 [[TMP8]]) #[[ATTR2]]
|
||||
; VF4-NEXT: [[TMP10:%.*]] = extractvalue { i32, i32, i32 } [[TMP3]], 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; VF4-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0
|
||||
; VF4-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> [[TMP11]], 0
|
||||
; VF4-NEXT: [[TMP13:%.*]] = extractvalue { i32, i32, i32 } [[TMP3]], 1
|
||||
; VF4-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], 1
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP13]], i32 0
|
||||
; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP13]], i64 0
|
||||
; VF4-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], <4 x i32> [[TMP15]], 1
|
||||
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { i32, i32, i32 } [[TMP3]], 2
|
||||
; VF4-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], 2
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP17]], i32 0
|
||||
; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP17]], i64 0
|
||||
; VF4-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], <4 x i32> [[TMP19]], 2
|
||||
; VF4-NEXT: [[TMP21:%.*]] = extractvalue { i32, i32, i32 } [[TMP5]], 0
|
||||
; VF4-NEXT: [[TMP22:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], 0
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP21]], i32 1
|
||||
; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP21]], i64 1
|
||||
; VF4-NEXT: [[TMP24:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], <4 x i32> [[TMP23]], 0
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractvalue { i32, i32, i32 } [[TMP5]], 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP24]], 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP25]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP25]], i64 1
|
||||
; VF4-NEXT: [[TMP28:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP24]], <4 x i32> [[TMP27]], 1
|
||||
; VF4-NEXT: [[TMP29:%.*]] = extractvalue { i32, i32, i32 } [[TMP5]], 2
|
||||
; VF4-NEXT: [[TMP30:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP28]], 2
|
||||
; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP29]], i32 1
|
||||
; VF4-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP29]], i64 1
|
||||
; VF4-NEXT: [[TMP32:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP28]], <4 x i32> [[TMP31]], 2
|
||||
; VF4-NEXT: [[TMP33:%.*]] = extractvalue { i32, i32, i32 } [[TMP7]], 0
|
||||
; VF4-NEXT: [[TMP34:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP32]], 0
|
||||
; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP33]], i32 2
|
||||
; VF4-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP33]], i64 2
|
||||
; VF4-NEXT: [[TMP36:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP32]], <4 x i32> [[TMP35]], 0
|
||||
; VF4-NEXT: [[TMP37:%.*]] = extractvalue { i32, i32, i32 } [[TMP7]], 1
|
||||
; VF4-NEXT: [[TMP38:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP36]], 1
|
||||
; VF4-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP37]], i32 2
|
||||
; VF4-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP37]], i64 2
|
||||
; VF4-NEXT: [[TMP40:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP36]], <4 x i32> [[TMP39]], 1
|
||||
; VF4-NEXT: [[TMP41:%.*]] = extractvalue { i32, i32, i32 } [[TMP7]], 2
|
||||
; VF4-NEXT: [[TMP42:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP40]], 2
|
||||
; VF4-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP41]], i32 2
|
||||
; VF4-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP41]], i64 2
|
||||
; VF4-NEXT: [[TMP44:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP40]], <4 x i32> [[TMP43]], 2
|
||||
; VF4-NEXT: [[TMP45:%.*]] = extractvalue { i32, i32, i32 } [[TMP9]], 0
|
||||
; VF4-NEXT: [[TMP46:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP44]], 0
|
||||
; VF4-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP45]], i32 3
|
||||
; VF4-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP45]], i64 3
|
||||
; VF4-NEXT: [[TMP48:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP44]], <4 x i32> [[TMP47]], 0
|
||||
; VF4-NEXT: [[TMP49:%.*]] = extractvalue { i32, i32, i32 } [[TMP9]], 1
|
||||
; VF4-NEXT: [[TMP50:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP48]], 1
|
||||
; VF4-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP50]], i32 [[TMP49]], i32 3
|
||||
; VF4-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP50]], i32 [[TMP49]], i64 3
|
||||
; VF4-NEXT: [[TMP52:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP48]], <4 x i32> [[TMP51]], 1
|
||||
; VF4-NEXT: [[TMP53:%.*]] = extractvalue { i32, i32, i32 } [[TMP9]], 2
|
||||
; VF4-NEXT: [[TMP54:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP52]], 2
|
||||
; VF4-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> [[TMP54]], i32 [[TMP53]], i32 3
|
||||
; VF4-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> [[TMP54]], i32 [[TMP53]], i64 3
|
||||
; VF4-NEXT: [[TMP56:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP52]], <4 x i32> [[TMP55]], 2
|
||||
; VF4-NEXT: [[TMP57:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP56]], 0
|
||||
; VF4-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[DST_A]], i64 [[INDEX]]
|
||||
@@ -371,54 +371,54 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { i32, i32, i32 } @fn3(i32 [[TMP5]]) #[[ATTR2]]
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32, i32 } [[TMP4]], 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP8]], 0
|
||||
; VF2IC2-NEXT: [[TMP10:%.*]] = extractvalue { i32, i32, i32 } [[TMP4]], 1
|
||||
; VF2IC2-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], 1
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP10]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP10]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], <2 x i32> [[TMP12]], 1
|
||||
; VF2IC2-NEXT: [[TMP14:%.*]] = extractvalue { i32, i32, i32 } [[TMP4]], 2
|
||||
; VF2IC2-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 2
|
||||
; VF2IC2-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP14]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP14]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], <2 x i32> [[TMP16]], 2
|
||||
; VF2IC2-NEXT: [[TMP18:%.*]] = extractvalue { i32, i32, i32 } [[TMP6]], 0
|
||||
; VF2IC2-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], 0
|
||||
; VF2IC2-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP18]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], <2 x i32> [[TMP20]], 0
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = extractvalue { i32, i32, i32 } [[TMP6]], 1
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], 1
|
||||
; VF2IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP22]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP22]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP25:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], <2 x i32> [[TMP24]], 1
|
||||
; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { i32, i32, i32 } [[TMP6]], 2
|
||||
; VF2IC2-NEXT: [[TMP27:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP25]], 2
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP27]], i32 [[TMP26]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP27]], i32 [[TMP26]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP25]], <2 x i32> [[TMP28]], 2
|
||||
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[WIDE_LOAD1]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = tail call { i32, i32, i32 } @fn3(i32 [[TMP30]]) #[[ATTR2]]
|
||||
; VF2IC2-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[WIDE_LOAD1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP33:%.*]] = tail call { i32, i32, i32 } @fn3(i32 [[TMP32]]) #[[ATTR2]]
|
||||
; VF2IC2-NEXT: [[TMP34:%.*]] = extractvalue { i32, i32, i32 } [[TMP31]], 0
|
||||
; VF2IC2-NEXT: [[TMP35:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP35:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP36:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> [[TMP35]], 0
|
||||
; VF2IC2-NEXT: [[TMP37:%.*]] = extractvalue { i32, i32, i32 } [[TMP31]], 1
|
||||
; VF2IC2-NEXT: [[TMP38:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP36]], 1
|
||||
; VF2IC2-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> [[TMP38]], i32 [[TMP37]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> [[TMP38]], i32 [[TMP37]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP40:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP36]], <2 x i32> [[TMP39]], 1
|
||||
; VF2IC2-NEXT: [[TMP41:%.*]] = extractvalue { i32, i32, i32 } [[TMP31]], 2
|
||||
; VF2IC2-NEXT: [[TMP42:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP40]], 2
|
||||
; VF2IC2-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[TMP41]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> [[TMP42]], i32 [[TMP41]], i64 0
|
||||
; VF2IC2-NEXT: [[TMP44:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP40]], <2 x i32> [[TMP43]], 2
|
||||
; VF2IC2-NEXT: [[TMP45:%.*]] = extractvalue { i32, i32, i32 } [[TMP33]], 0
|
||||
; VF2IC2-NEXT: [[TMP46:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP44]], 0
|
||||
; VF2IC2-NEXT: [[TMP47:%.*]] = insertelement <2 x i32> [[TMP46]], i32 [[TMP45]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP47:%.*]] = insertelement <2 x i32> [[TMP46]], i32 [[TMP45]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP48:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP44]], <2 x i32> [[TMP47]], 0
|
||||
; VF2IC2-NEXT: [[TMP49:%.*]] = extractvalue { i32, i32, i32 } [[TMP33]], 1
|
||||
; VF2IC2-NEXT: [[TMP50:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP48]], 1
|
||||
; VF2IC2-NEXT: [[TMP51:%.*]] = insertelement <2 x i32> [[TMP50]], i32 [[TMP49]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP51:%.*]] = insertelement <2 x i32> [[TMP50]], i32 [[TMP49]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP52:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP48]], <2 x i32> [[TMP51]], 1
|
||||
; VF2IC2-NEXT: [[TMP53:%.*]] = extractvalue { i32, i32, i32 } [[TMP33]], 2
|
||||
; VF2IC2-NEXT: [[TMP54:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP52]], 2
|
||||
; VF2IC2-NEXT: [[TMP55:%.*]] = insertelement <2 x i32> [[TMP54]], i32 [[TMP53]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP55:%.*]] = insertelement <2 x i32> [[TMP54]], i32 [[TMP53]], i64 1
|
||||
; VF2IC2-NEXT: [[TMP56:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP52]], <2 x i32> [[TMP55]], 2
|
||||
; VF2IC2-NEXT: [[TMP57:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 0
|
||||
; VF2IC2-NEXT: [[TMP58:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 0
|
||||
|
||||
@@ -229,6 +229,10 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) {
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
|
||||
Reference in New Issue
Block a user