Files
clang-p2996/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
Nikita Popov 2fab927546 [LoopVectorize] Convert some tests to opaque pointers (NFC)
Check lines for some of these tests were regenerated. The difference
is that with opaque pointers SCEVExpander always emits i8 GEPs,
making the address calculation explicit. This is a known problem
that will be solved long term by making all address calculations
explicit.
2023-01-04 17:25:42 +01:00

68 lines
4.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg < %s -S -o - | FileCheck %s --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
; REQUIRES: asserts
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
; CHECK-COST-LABEL: arm_offset_q15
; CHECK-COST: Found an estimated cost of 7 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
; CHECK-COST: Found an estimated cost of 36 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
; CHECK-COST: Found an estimated cost of 8 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
; CHECK-COST: Found an estimated cost of 2 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
define void @arm_offset_q15(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
; CHECK-LABEL: @arm_offset_q15(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[BLOCKSIZE]], 7
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT6]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[TMP1]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[BLOCKSIZE]])
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT7]])
; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[TMP2]], ptr [[NEXT_GEP5]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
entry:
%cmp.not6 = icmp eq i32 %blockSize, 0
br i1 %cmp.not6, label %while.end, label %while.body
while.body: ; preds = %entry, %while.body
%blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
%pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
%pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
%incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1
%0 = load i16, ptr %pSrc.addr.08, align 2
%1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
%incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1
store i16 %1, ptr %pDst.addr.07, align 2
%dec = add i32 %blkCnt.09, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
ret void
}
declare i16 @llvm.sadd.sat.i16(i16, i16)
attributes #0 = { "target-features"="+mve" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-COST: {{.*}}