Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
Sander de Smalen 86729538bd [LV] Let selectVectorizationFactor reason directly on VectorizationFactor.
Rather than maintaining two separate values, a `float` for the per-lane
cost and a Width for the VF, maintain a single VectorizationFactor which
comprises the two and also removes the need for converting an integer value
to float.

This simplifies the query when asking if one VF is more profitable than
another when we want to extend this for scalable vectors (which may
require additional options to determine if e.g. a scalable VF of the
some cost, is more profitable than a fixed VF of the same cost).

The patch isn't entirely NFC because it also fixes an issue in
selectEpilogueVectorizationFactor, where the cost passed to ProfitableVFs
no longer truncates the floating-point cost from `float` to `unsigned` to
then perform the calculation on the truncated cost. It now does
a cost comparison with the correct precision.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D100121
2021-04-20 09:54:45 +01:00

119 lines
4.0 KiB
LLVM

; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Verify that we generate 512-bit wide vectors for a basic integer memset
; loop.
; CHECK-LABEL: f:
; CHECK: vmovdqu64 %zmm{{.}},
; CHECK-NOT: %ymm
; CHECK: epilog
; CHECK: %ymm
; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
; CHECK-PREFER-AVX256-LABEL: f:
; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
; CHECK-PREFER-AVX256-NOT: %zmm
define void @f(i32* %a, i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %n, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void
}
; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
; vectors
; CHECK-LABEL: g:
; CHECK: vmovdqu %ymm{{.}},
; CHECK-NOT: %zmm
; CHECK-PREFER-AVX256-LABEL: g:
; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
; CHECK-PREFER-AVX256-NOT: %zmm
define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %n, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void
}
; Verify that the "prefer-vector-width=512" attribute override the subtarget
; vectors
; CHECK-LABEL: h:
; CHECK: vmovdqu64 %zmm{{.}},
; CHECK-NOT: %ymm
; CHECK: epilog
; CHECK: %ymm
; CHECK-PREFER-AVX256-LABEL: h:
; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
; CHECK-PREFER-AVX256-NOT: %ymm
; CHECK-PREFER-AVX256: epilog
; CHECK-PREFER-AVX256: %ymm
define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %n, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void
}