Files
clang-p2996/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
Nikita Popov 2fab927546 [LoopVectorize] Convert some tests to opaque pointers (NFC)
Check lines for some of these tests were regenerated. The difference
is that with opaque pointers SCEVExpander always emits i8 GEPs,
making the address calculation explicit. This is a known problem
that will be solved long term by making all address calculations
explicit.
2023-01-04 17:25:42 +01:00

268 lines
23 KiB
LLVM

; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i16, ptr %arrayidx, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = sext i16 %1 to i32
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp2, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv6 = add i16 %1, %0
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %conv6, ptr %arrayidx7, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %inc = add nuw nsw i32 %i.016, 1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %inc, %n
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK: LV: Scalar loop costs: 5.
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i16, ptr %arrayidx, align 2
; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv = sext i16 %1 to i32
; CHECK: LV: Found an estimated cost of 20 for VF 2 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp2, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %conv6 = add i16 %1, %0
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %conv6, ptr %arrayidx7, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %inc = add nuw nsw i32 %i.016, 1
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %exitcond.not = icmp eq i32 %inc, %n
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK: LV: Vector loop of width 2 costs: 43.
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = sext i16 %1 to i32
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp2, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %conv6 = add i16 %1, %0
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %conv6, ptr %arrayidx7, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %inc = add nuw nsw i32 %i.016, 1
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %exitcond.not = icmp eq i32 %inc, %n
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK: LV: Vector loop of width 4 costs: 2.
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, ptr %arrayidx, align 2
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv = sext i16 %1 to i32
; CHECK: LV: Found an estimated cost of 36 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp2, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv6 = add i16 %1, %0
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i16 %conv6, ptr %arrayidx7, align 2
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %inc = add nuw nsw i32 %i.016, 1
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i32 %inc, %n
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
; CHECK: LV: Vector loop of width 8 costs: 5.
; CHECK: LV: Selecting VF: 4.
define void @expensive_icmp(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n, i16 zeroext %m) #0 {
entry:
%cmp15 = icmp sgt i32 %n, 0
br i1 %cmp15, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%conv1 = zext i16 %m to i32
%0 = trunc i32 %n to i16
br label %for.body
for.cond.cleanup: ; preds = %for.inc, %entry
ret void
for.body: ; preds = %for.body.lr.ph, %for.inc
%i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
%arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
%1 = load i16, ptr %arrayidx, align 2
%conv = sext i16 %1 to i32
%cmp2 = icmp sgt i32 %conv, %conv1
br i1 %cmp2, label %if.then, label %for.inc
if.then: ; preds = %for.body
%conv6 = add i16 %1, %0
%arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
store i16 %conv6, ptr %arrayidx7, align 2
br label %for.inc
for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i32 %i.016, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv1 = sext i8 %0 to i32
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Scalar loop costs: 9.
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1
; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv1 = sext i8 %0 to i32
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1
; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 22 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Vector loop of width 2 costs: 65.
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv1 = sext i8 %0 to i32
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Vector loop of width 4 costs: 3.
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv1 = sext i8 %0 to i32
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Vector loop of width 8 costs: 3.
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv1 = sext i8 %0 to i32
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv3 = sext i8 %1 to i32
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %mul = mul nsw i32 %conv3, %conv1
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %shr = ashr i32 %mul, 7
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127
; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127
; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv4 = trunc i32 %spec.select.i to i8
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1
; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %dec = add i32 %blkCnt.012, -1
; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %cmp.not = icmp eq i32 %dec, 0
; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body
; CHECK: LV: Vector loop of width 16 costs: 3.
; CHECK: LV: Selecting VF: 16.
define void @cheap_icmp(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %pDst, i32 %blockSize) #0 {
entry:
%cmp.not8 = icmp eq i32 %blockSize, 0
br i1 %cmp.not8, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body.preheader, %while.body
%blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
%pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
%pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
%pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
%0 = load i8, ptr %pSrcA.addr.011, align 1
%conv1 = sext i8 %0 to i32
%incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
%1 = load i8, ptr %pSrcB.addr.09, align 1
%conv3 = sext i8 %1 to i32
%mul = mul nsw i32 %conv3, %conv1
%shr = ashr i32 %mul, 7
%2 = icmp slt i32 %shr, 127
%spec.select.i = select i1 %2, i32 %shr, i32 127
%conv4 = trunc i32 %spec.select.i to i8
%incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
store i8 %conv4, ptr %pDst.addr.010, align 1
%dec = add i32 %blkCnt.012, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
ret void
}
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp
; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %cmp1 = fcmp
; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: %cmp1 = fcmp
define void @floatcmp(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body
while.body: ; preds = %entry, %while.body
%pSrc.addr.010 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ]
%blockSize.addr.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
%pDst.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pDst, %entry ]
%0 = load float, ptr %pSrc.addr.010, align 4
%cmp1 = fcmp nnan ninf nsz olt float %0, 0.000000e+00
%cond = select nnan ninf nsz i1 %cmp1, float 1.000000e+01, float %0
%conv = fptosi float %cond to i32
%incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.08, i32 1
store i32 %conv, ptr %pDst.addr.08, align 4
%incdec.ptr2 = getelementptr inbounds float, ptr %pSrc.addr.010, i32 1
%dec = add i32 %blockSize.addr.09, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
ret void
}
attributes #0 = { "target-features"="+mve" }