[VPlan] Do not create VPWidenCall recipes for scalar vector factors.

'Widen' recipe are only used when actual vector values are generated.
Fix tryToWidenCall to do not create VPWidenCallRecipes for scalar vector
factors.

This was exposed by D123720, because the widened recipes are considered
vector users.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D124718
This commit is contained in:
Florian Hahn
2022-05-02 19:40:33 +01:00
parent e547a333a4
commit 0ef8ca6d88
3 changed files with 9 additions and 7 deletions

View File

@@ -8329,6 +8329,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
return nullptr;
auto willWiden = [&](ElementCount VF) -> bool {
if (VF.isScalar())
return false;
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// The following case may be scalarized depending on the VF.
// The flag shows whether we use Intrinsic or a usual Call for vectorized

View File

@@ -1139,10 +1139,10 @@ define float @fmuladd_scalar_vf(float* %a, float* %b, i64 %n) {
; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float*
; CHECK-UNORDERED: [[LOAD6:%.*]] = load float, float*
; CHECK-UNORDERED: [[LOAD7:%.*]] = load float, float*
; CHECK-UNORDERED: [[FMULADD]] = call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
; CHECK-UNORDERED: [[FMULADD1]] = call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
; CHECK-UNORDERED: [[FMULADD2]] = call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
; CHECK-UNORDERED: [[FMULADD3]] = call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
; CHECK-UNORDERED: [[FMULADD]] = tail call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
; CHECK-UNORDERED: [[FMULADD1]] = tail call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
; CHECK-UNORDERED: [[FMULADD2]] = tail call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
; CHECK-UNORDERED: [[FMULADD3]] = tail call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
; CHECK-UNORDERED-NOT: llvm.vector.reduce.fadd
; CHECK-UNORDERED: middle.block:
; CHECK-UNORDERED: [[BIN_RDX:%.*]] = fadd float [[FMULADD1]], [[FMULADD]]

View File

@@ -12,7 +12,7 @@
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: vp<[[IV_STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<%start>, ir<1>
; CHECK-NEXT: WIDEN-CALL ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>)
; CHECK-NEXT: CLONE ir<%min> = call vp<[[IV_STEPS]]>, ir<65535>, ir<@llvm.smin.i32>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%dst>, vp<[[IV_STEPS]]>
; CHECK-NEXT: CLONE store ir<%min>, ir<%arrayidx>
; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]>
@@ -27,8 +27,8 @@ define void @test(i32 %start, ptr %dst) {
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 %start, [[INDEX]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDUCTION]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDUCTION1]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 8