Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
Rosie Sumpter 961f51fdf0 [LoopVectorize][CostModel] Choose smaller VFs for in-loop reductions without loads/stores
For loops that contain in-loop reductions but no loads or stores, large
VFs are chosen because LoopVectorizationCostModel::getSmallestAndWidestTypes
has no element types to check through and so returns the default widths
(-1U for the smallest and 8 for the widest). This results in the widest
VF being chosen for the following example,

float s = 0;
for (int i = 0; i < N; ++i)
  s += (float) i*i;

which, for more computationally intensive loops, leads to large loop
sizes when the operations end up being scalarized.

In this patch, for the case where ElementTypesInLoop is empty, the widest
type is determined by finding the smallest type used by recurrences in
the loop instead of falling back to a default value of 8 bits. This
results in the cost model choosing a more sensible VF for loops like
the one above.

Differential Revision: https://reviews.llvm.org/D113973
2022-01-04 10:12:57 +00:00

46 lines
1.6 KiB
LLVM

; RUN: opt -S -loop-vectorize < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
entry:
invoke void @_CxxThrowException(i8* null, i8* null)
to label %unreachable unwind label %catch.dispatch
catch.dispatch: ; preds = %entry
%0 = catchswitch within none [label %catch] unwind to caller
catch: ; preds = %catch.dispatch
%1 = catchpad within %0 [i8* null, i32 64, i8* null]
br label %for.body
for.cond.cleanup: ; preds = %for.body
catchret from %1 to label %try.cont
for.body: ; preds = %for.body, %catch
%i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ]
%call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ]
%inc = add nuw nsw i32 %i.07, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
try.cont: ; preds = %for.cond.cleanup
ret void
unreachable: ; preds = %entry
unreachable
}
; CHECK-LABEL: define void @test1(
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
declare i32 @__CxxFrameHandler3(...)
declare double @floor(double) #1
attributes #0 = { "target-features"="+sse2" }
attributes #1 = { nounwind readnone }