For loops that contain in-loop reductions but no loads or stores, large VFs are chosen because LoopVectorizationCostModel::getSmallestAndWidestTypes has no element types to check through and so returns the default widths (-1U for the smallest and 8 for the widest). This results in the widest VF being chosen for the following example, float s = 0; for (int i = 0; i < N; ++i) s += (float) i*i; which, for more computationally intensive loops, leads to large loop sizes when the operations end up being scalarized. In this patch, for the case where ElementTypesInLoop is empty, the widest type is determined by finding the smallest type used by recurrences in the loop instead of falling back to a default value of 8 bits. This results in the cost model choosing a more sensible VF for loops like the one above. Differential Revision: https://reviews.llvm.org/D113973
46 lines
1.6 KiB
LLVM
46 lines
1.6 KiB
LLVM
; RUN: opt -S -loop-vectorize < %s | FileCheck %s
|
|
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
|
|
target triple = "i686-pc-windows-msvc18.0.0"
|
|
|
|
define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
|
|
entry:
|
|
invoke void @_CxxThrowException(i8* null, i8* null)
|
|
to label %unreachable unwind label %catch.dispatch
|
|
|
|
catch.dispatch: ; preds = %entry
|
|
%0 = catchswitch within none [label %catch] unwind to caller
|
|
|
|
catch: ; preds = %catch.dispatch
|
|
%1 = catchpad within %0 [i8* null, i32 64, i8* null]
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
catchret from %1 to label %try.cont
|
|
|
|
for.body: ; preds = %for.body, %catch
|
|
%i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ]
|
|
%call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ]
|
|
%inc = add nuw nsw i32 %i.07, 1
|
|
%exitcond = icmp eq i32 %inc, 1024
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
try.cont: ; preds = %for.cond.cleanup
|
|
ret void
|
|
|
|
unreachable: ; preds = %entry
|
|
unreachable
|
|
}
|
|
|
|
; CHECK-LABEL: define void @test1(
|
|
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
|
|
; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
|
|
|
|
declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
|
|
|
|
declare i32 @__CxxFrameHandler3(...)
|
|
|
|
declare double @floor(double) #1
|
|
|
|
attributes #0 = { "target-features"="+sse2" }
|
|
attributes #1 = { nounwind readnone }
|