Files
clang-p2996/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
Rin df8e0d057d [AArch64][LoopVectorize] Use upper bound trip count instead of the constant TC when choosing max VF (#67697)
This patch is based off of
https://github.com/llvm/llvm-project/pull/67543.

We are currently using the exact trip count to make decisions regarding
the maximum VF. We can instead use the upper bound TC, which will be the
same as the constant trip count when that is known.
2023-10-09 16:26:19 +01:00

55 lines
2.3 KiB
LLVM

; RUN: opt -S < %s -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve 2>&1 | FileCheck %s
define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val){
; CHECK-LABEL: define void @clamped_tc_8
; CHECK: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> {{.*}}, ptr {{.*}}, i32 1, <vscale x 8 x i1> {{.*}})
entry:
br label %for.body
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%p_out_tail.09 = phi ptr [ %dst, %entry ], [ %incdec.ptr, %for.body ]
%0 = shl nuw nsw i64 %indvars.iv, 3
%shr3 = lshr i64 %val, %0
%conv4 = trunc i64 %shr3 to i8
store i8 %conv4, ptr %p_out_tail.09, align 1
%incdec.ptr = getelementptr inbounds i8, ptr %p_out_tail.09, i64 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
}
define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val){
; CHECK-LABEL: define void @clamped_tc_max_8(
; CHECK: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> {{.*}}, ptr {{.*}}, i32 1, <vscale x 8 x i1> {{.*}})
entry:
%rem = and i32 %n, 63
%cmp8.not = icmp eq i32 %rem, 0
br i1 %cmp8.not, label %for.cond.cleanup, label %for.body.preheader
for.body.preheader: ; preds = %entry
%add = add nuw nsw i32 %rem, 7
%shr = lshr i32 %add, 3
%wide.trip.count = zext i32 %shr to i64
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%p_out_tail.09 = phi ptr [ %dst, %for.body.preheader ], [ %incdec.ptr, %for.body ]
%0 = shl nuw nsw i64 %indvars.iv, 3
%shr3 = lshr i64 %val, %0
%conv4 = trunc i64 %shr3 to i8
store i8 %conv4, ptr %p_out_tail.09, align 1
%incdec.ptr = getelementptr inbounds i8, ptr %p_out_tail.09, i64 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
}