Invalid costs can be used to avoid vectorization with a given VF, which is used for scalable vectors to avoid things that the code-generator cannot handle. If we override the cost using the -force-target-instruction-cost option of the LV, we would override this mechanism, rendering the flag useless. This change ensures the cost is only overriden when the original cost that was calculated is valid. That allows the flag to be used in combination with the -scalable-vectorization option. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D106677
251 lines
11 KiB
LLVM
251 lines
11 KiB
LLVM
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
|
|
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
|
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
|
|
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-target-instruction-cost=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
|
|
; RUN: -scalable-vectorization=on -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
|
|
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
|
|
|
|
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
|
|
; CHECK-LABEL: @vec_load
|
|
; CHECK: vector.body:
|
|
; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
|
|
; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])
|
|
entry:
|
|
%cmp7 = icmp sgt i64 %N, 0
|
|
br i1 %cmp7, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, double* %b, i64 %iv
|
|
%0 = load double, double* %arrayidx, align 8
|
|
%1 = call double @foo(double %0) #0
|
|
%add = fadd double %1, 1.000000e+00
|
|
%arrayidx2 = getelementptr inbounds double, double* %a, i64 %iv
|
|
store double %add, double* %arrayidx2, align 8
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vec_scalar(i64 %N, double* nocapture %a) {
|
|
; CHECK-LABEL: @vec_scalar
|
|
; CHECK: vector.body:
|
|
; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
|
|
entry:
|
|
%cmp7 = icmp sgt i64 %N, 0
|
|
br i1 %cmp7, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%0 = call double @foo(double 10.0) #0
|
|
%sub = fsub double %0, 1.000000e+00
|
|
%arrayidx = getelementptr inbounds double, double* %a, i64 %iv
|
|
store double %sub, double* %arrayidx, align 8
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vec_ptr(i64 %N, i64* noalias %a, i64** readnone %b) {
|
|
; CHECK-LABEL: @vec_ptr
|
|
; CHECK: vector.body:
|
|
; CHECK: %[[LOAD:.*]] = load <vscale x 2 x i64*>, <vscale x 2 x i64*>*
|
|
; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*> %[[LOAD]])
|
|
entry:
|
|
%cmp7 = icmp sgt i64 %N, 0
|
|
br i1 %cmp7, label %for.body, label %for.end
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep = getelementptr i64*, i64** %b, i64 %iv
|
|
%load = load i64*, i64** %gep
|
|
%call = call i64 @bar(i64* %load) #1
|
|
%arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
|
|
store i64 %call, i64* %arrayidx
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
|
|
; CHECK-LABEL: @vec_intrinsic
|
|
; CHECK: vector.body:
|
|
; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
|
|
; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
|
|
entry:
|
|
%cmp7 = icmp sgt i64 %N, 0
|
|
br i1 %cmp7, label %for.body, label %for.end
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, double* %a, i64 %iv
|
|
%0 = load double, double* %arrayidx, align 8
|
|
%1 = call fast double @llvm.sin.f64(double %0) #2
|
|
%add = fadd fast double %1, 1.000000e+00
|
|
store double %add, double* %arrayidx, align 8
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; CHECK-REMARKS: UserVF ignored because of invalid costs.
|
|
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
|
|
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
|
|
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
|
|
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
|
|
; CHECK: @vec_sin_no_mapping
|
|
; CHECK: call fast <2 x float> @llvm.sin.v2f32
|
|
; CHECK-NOT: <vscale x
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
|
%0 = load float, float* %arrayidx, align 4, !dbg !11
|
|
%1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
|
|
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
|
store float %1, float* %arrayidx1, align 4, !dbg !13
|
|
%inc = add nuw nsw i64 %i.07, 1
|
|
%exitcond.not = icmp eq i64 %inc, %n
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; CHECK-REMARKS: UserVF ignored because of invalid costs.
|
|
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
|
|
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
|
|
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
|
|
; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
|
|
define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
|
|
; CHECK: @vec_sin_no_mapping_ite
|
|
; CHECK-NOT: <vscale x
|
|
; CHECK: ret
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %if.end
|
|
%i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
|
%0 = load float, float* %arrayidx, align 4, !dbg !11
|
|
%cmp = fcmp ugt float %0, 0.0000
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
if.then:
|
|
%1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
|
|
br label %if.end
|
|
if.else:
|
|
%2 = tail call fast float @llvm.sin.f32(float 0.0), !dbg !13
|
|
br label %if.end
|
|
if.end:
|
|
%3 = phi float [%1, %if.then], [%2, %if.else]
|
|
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
|
store float %3, float* %arrayidx1, align 4, !dbg !14
|
|
%inc = add nuw nsw i64 %i.07, 1
|
|
%exitcond.not = icmp eq i64 %inc, %n
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; CHECK-REMARKS: UserVF ignored because of invalid costs.
|
|
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
|
|
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
|
|
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
|
|
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
|
|
; CHECK: @vec_sin_fixed_mapping
|
|
; CHECK: call fast <2 x float> @llvm.sin.v2f32
|
|
; CHECK-NOT: <vscale x
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
|
%0 = load float, float* %arrayidx, align 4, !dbg !11
|
|
%1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
|
|
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
|
store float %1, float* %arrayidx1, align 4, !dbg !13
|
|
%inc = add nuw nsw i64 %i.07, 1
|
|
%exitcond.not = icmp eq i64 %inc, %n
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Even though there are no function mappings attached to the call
|
|
; in the loop below we can still vectorize the loop because SVE has
|
|
; hardware support in the form of the 'fqsrt' instruction.
|
|
define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
|
|
; CHECK: @vec_sqrt_no_mapping
|
|
; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
|
|
%0 = load float, float* %arrayidx, align 4
|
|
%1 = tail call fast float @llvm.sqrt.f32(float %0)
|
|
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
|
|
store float %1, float* %arrayidx1, align 4
|
|
%inc = add nuw nsw i64 %i.07, 1
|
|
%exitcond.not = icmp eq i64 %inc, %n
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
|
|
declare double @foo(double)
|
|
declare i64 @bar(i64*)
|
|
declare double @llvm.sin.f64(double)
|
|
declare float @llvm.sin.f32(float)
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
|
|
declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
|
|
declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
|
|
declare <2 x double> @sin_vec_v2f64(<2 x double>)
|
|
|
|
attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" }
|
|
attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" }
|
|
attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec_nxv2f64)" }
|
|
attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_vec_v2f64)" }
|
|
|
|
!1 = distinct !{!1, !2, !3}
|
|
!2 = !{!"llvm.loop.vectorize.width", i32 2}
|
|
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
|
|
|
!llvm.dbg.cu = !{!4}
|
|
!llvm.module.flags = !{!7}
|
|
!llvm.ident = !{!8}
|
|
|
|
!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
|
|
!5 = !DIFile(filename: "t.c", directory: "somedir")
|
|
!6 = !{}
|
|
!7 = !{i32 2, !"Debug Info Version", i32 3}
|
|
!8 = !{!"clang"}
|
|
!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
|
|
!10 = !DISubroutineType(types: !6)
|
|
!11 = !DILocation(line: 3, column: 10, scope: !9)
|
|
!12 = !DILocation(line: 3, column: 20, scope: !9)
|
|
!13 = !DILocation(line: 3, column: 30, scope: !9)
|
|
!14 = !DILocation(line: 3, column: 40, scope: !9)
|