Rather than maintaining two separate values, a `float` for the per-lane cost and a Width for the VF, maintain a single VectorizationFactor which comprises the two and also removes the need for converting an integer value to float. This simplifies the query when asking if one VF is more profitable than another when we want to extend this for scalable vectors (which may require additional options to determine if e.g. a scalable VF of the some cost, is more profitable than a fixed VF of the same cost). The patch isn't entirely NFC because it also fixes an issue in selectEpilogueVectorizationFactor, where the cost passed to ProfitableVFs no longer truncates the floating-point cost from `float` to `unsigned` to then perform the calculation on the truncated cost. It now does a cost comparison with the correct precision. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D100121
119 lines
4.0 KiB
LLVM
119 lines
4.0 KiB
LLVM
; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
|
|
; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
|
|
|
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx10.9.0"
|
|
|
|
; Verify that we generate 512-bit wide vectors for a basic integer memset
|
|
; loop.
|
|
|
|
; CHECK-LABEL: f:
|
|
; CHECK: vmovdqu64 %zmm{{.}},
|
|
; CHECK-NOT: %ymm
|
|
; CHECK: epilog
|
|
; CHECK: %ymm
|
|
|
|
; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
|
|
|
|
; CHECK-PREFER-AVX256-LABEL: f:
|
|
; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
|
|
; CHECK-PREFER-AVX256-NOT: %zmm
|
|
|
|
define void @f(i32* %a, i32 %n) {
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
store i32 %n, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
|
|
; vectors
|
|
|
|
; CHECK-LABEL: g:
|
|
; CHECK: vmovdqu %ymm{{.}},
|
|
; CHECK-NOT: %zmm
|
|
|
|
; CHECK-PREFER-AVX256-LABEL: g:
|
|
; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
|
|
; CHECK-PREFER-AVX256-NOT: %zmm
|
|
|
|
define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
store i32 %n, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|
|
|
|
; Verify that the "prefer-vector-width=512" attribute override the subtarget
|
|
; vectors
|
|
|
|
; CHECK-LABEL: h:
|
|
; CHECK: vmovdqu64 %zmm{{.}},
|
|
; CHECK-NOT: %ymm
|
|
; CHECK: epilog
|
|
; CHECK: %ymm
|
|
|
|
; CHECK-PREFER-AVX256-LABEL: h:
|
|
; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
|
|
; CHECK-PREFER-AVX256-NOT: %ymm
|
|
; CHECK-PREFER-AVX256: epilog
|
|
; CHECK-PREFER-AVX256: %ymm
|
|
|
|
define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
store i32 %n, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
|
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
|
|
|
for.end.loopexit: ; preds = %for.body
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.end.loopexit, %entry
|
|
ret void
|
|
}
|