[LV] Mark dead instructions in loop as free.

Update collectValuesToIgnore to also ignore dead instructions in the
loop. Such instructions will be removed by VPlan-based DCE and won't be
considered by the VPlan-based cost model.

This closes a gap between the legacy and VPlan-based cost model. In
practice with the default pipelines, there shouldn't be any dead
instructions in loops reaching LoopVectorize, but it is easy to generate
such cases by hand or automatically via fuzzers.

Fixes https://github.com/llvm/llvm-project/issues/99701.
This commit is contained in:
Florian Hahn
2024-07-24 09:31:32 +01:00
parent 9a25866402
commit ba8126b6fe
2 changed files with 130 additions and 0 deletions

View File

@@ -137,6 +137,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -6681,6 +6682,7 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
SmallVector<Value *, 4> DeadInterleavePointerOps;
SmallVector<Value *, 4> DeadOps;
for (BasicBlock *BB : TheLoop->blocks())
for (Instruction &I : *BB) {
// Find all stores to invariant variables. Since they are going to sink
@@ -6690,6 +6692,17 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
ValuesToIgnore.insert(&I);
if (VecValuesToIgnore.contains(&I) || ValuesToIgnore.contains(&I))
continue;
// Add instructions that would be trivially dead and are only used by
// values already ignored to DeadOps to seed worklist.
if (wouldInstructionBeTriviallyDead(&I, TLI) &&
all_of(I.users(), [this](User *U) {
return VecValuesToIgnore.contains(U) || ValuesToIgnore.contains(U);
}))
DeadOps.push_back(&I);
// For interleave groups, we only create a pointer for the start of the
// interleave group. Queue up addresses of group members except the insert
// position for further processing.
@@ -6717,6 +6730,29 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
DeadInterleavePointerOps.append(Op->op_begin(), Op->op_end());
}
// Mark ops that would be trivially dead and are only used by ignored
// instructions as free.
for (unsigned I = 0; I != DeadOps.size(); ++I) {
auto *Op = dyn_cast<Instruction>(DeadOps[I]);
// Skip any op that shouldn't be considered dead.
if (!Op || !TheLoop->contains(Op) ||
!wouldInstructionBeTriviallyDead(Op, TLI) ||
any_of(Op->users(), [this](User *U) {
return !VecValuesToIgnore.contains(U) && !ValuesToIgnore.contains(U);
}))
continue;
// If all of Op's users are in ValuesToIgnore, add it to ValuesToIgnore
// which applies for both scalar and vector versions. Otherwise it is only
// dead in vector versions, so only add it to VecValuesToIgnore.
if (all_of(Op->users(),
[this](User *U) { return ValuesToIgnore.contains(U); }))
ValuesToIgnore.insert(Op);
VecValuesToIgnore.insert(Op);
DeadOps.append(Op->op_begin(), Op->op_end());
}
// Ignore type-promoting instructions we identified during reduction
// detection.
for (const auto &Reduction : Legal->getReductionVars()) {

View File

@@ -0,0 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S %s | FileCheck %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
; Test with a dead load in the loop, from
; https://github.com/llvm/llvm-project/issues/99701
define void @dead_load(ptr %p, i16 %start) {
; CHECK-LABEL: define void @dead_load(
; CHECK-SAME: ptr [[P:%.*]], i16 [[START:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[START_EXT:%.*]] = sext i16 [[START]] to i64
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START_EXT]], i64 111)
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[START_EXT]]
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[UMIN]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[START_EXT]]
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UMIN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 8 x i64> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP19]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[P]], i64 [[IV]]
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 3
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 111
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%start.ext = sext i16 %start to i64
br label %loop
loop:
%iv = phi i64 [ %start.ext, %entry ], [ %iv.next, %loop ]
%gep = getelementptr i16, ptr %p, i64 %iv
store i16 0, ptr %gep, align 2
%l = load i16, ptr %gep, align 2
%iv.next = add i64 %iv, 3
%cmp = icmp slt i64 %iv, 111
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.