Files
clang-p2996/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
David Green ab97c9bdb7 [LV] Fix scalar cost for tail predicated loops
When it comes to the scalar cost of any predicated block, the loop
vectorizer by default regards this predication as a sign that it is
looking at an if-conversion and divides the scalar cost of the block by
2, assuming it would only be executed half the time. This however makes
no sense if the predication has been introduced to tail predicate the
loop.

Original patch by Anna Welker

Differential Revision: https://reviews.llvm.org/D86452
2020-12-12 14:21:40 +00:00

102 lines
6.2 KiB
LLVM

; RUN: opt -loop-vectorize -debug-only=loop-vectorize -enable-arm-maskedgatscat -tail-predication=force-enabled -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-COST,CHECK-COST-2
; REQUIRES: asserts
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-none-none-eabi"
define void @pred_loop(i32* %off, i32* %data, i32* %dst, i32 %n) #0 {
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %add = add nuw nsw i32 %i.09, 1
; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i32, i32* %arrayidx, align 4
; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %add1 = add nsw i32 %0, 5
; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add1, i32* %arrayidx2, align 4
; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %add, %n
; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %exit.loopexit, label %for.body
; CHECK-COST-NEXT: LV: Scalar loop costs: 5.
entry:
%cmp8 = icmp sgt i32 %n, 0
br i1 %cmp8, label %for.body, label %exit
exit: ; preds = %for.body, %entry
ret void
for.body: ; preds = %entry, %for.body
%i.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%add = add nuw nsw i32 %i.09, 1
%arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
%0 = load i32, i32* %arrayidx, align 4
%add1 = add nsw i32 %0, 5
%arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
store i32 %add1, i32* %arrayidx2, align 4
%exitcond.not = icmp eq i32 %add, %n
br i1 %exitcond.not, label %exit, label %for.body
}
define i32 @if_convert(i32* %a, i32* %b, i32 %start, i32 %end) #0 {
; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i32, i32* %arrayidx, align 4
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i32, i32* %arrayidx2, align 4
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp3 = icmp sgt i32 %0, %1
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp3, label %if.then, label %if.end
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %0, 5
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %add = add nsw i32 %mul, 3
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %factor = shl i32 %add, 1
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %sub = sub i32 %0, %1
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %add7 = add i32 %sub, %factor
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add7, i32* %arrayidx2, align 4
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: br label %if.end
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %k.0, i32* %arrayidx, align 4
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %inc = add nsw i32 %i.032, 1
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %inc, %end
; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK-COST-2-NEXT: LV: Scalar loop costs: 8.
entry:
%cmp31 = icmp slt i32 %start, %end
br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %if.end
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret i32 undef
for.body: ; preds = %for.body.preheader, %if.end
%i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
%0 = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
%1 = load i32, i32* %arrayidx2, align 4
%cmp3 = icmp sgt i32 %0, %1
br i1 %cmp3, label %if.then, label %if.end
if.then: ; preds = %for.body
%mul = mul nsw i32 %0, 5
%add = add nsw i32 %mul, 3
%factor = shl i32 %add, 1
%sub = sub i32 %0, %1
%add7 = add i32 %sub, %factor
store i32 %add7, i32* %arrayidx2, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body
%k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
store i32 %k.0, i32* %arrayidx, align 4
%inc = add nsw i32 %i.032, 1
%exitcond.not = icmp eq i32 %inc, %end
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
}
attributes #0 = { "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp"}