Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
Florian Hahn 99d6c6d936 [VPlan] Model branch cond to enter scalar epilogue in VPlan. (#92651)
This patch moves branch condition creation to enter the scalar epilogue
loop to VPlan. Modeling the branch in the middle block also requires
modeling the successor blocks. This is done using the recently
introduced VPIRBasicBlock.

Note that the middle.block is still created as part of the skeleton and
then patched in during VPlan execution. Unfortunately the skeleton needs
to create the middle.block early on, as it is also used for induction
resume value creation and is also needed to properly update the
dominator tree during skeleton creation.

After this patch lands, I plan to move induction resume value and phi
node creation in the scalar preheader to VPlan. Once that is done, we
should be able to create the middle.block in VPlan directly.

This is a re-worked version based on the earlier
https://reviews.llvm.org/D150398 and the main change is the use of
VPIRBasicBlock.

Depends on https://github.com/llvm/llvm-project/pull/92525

PR: https://github.com/llvm/llvm-project/pull/92651
2024-07-05 10:08:42 +01:00

867 lines
51 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@c = common global [2048 x i32] zeroinitializer, align 16
@b = common global [2048 x i32] zeroinitializer, align 16
@d = common global [2048 x i32] zeroinitializer, align 16
@a = common global [2048 x i32] zeroinitializer, align 16
; The program below gathers and scatters data. We better not vectorize it.
define void @cost_model_1() nounwind uwtable noinline ssp {
; CHECK-LABEL: @cost_model_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP1]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[IDXPROM1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[IDXPROM5]]
; CHECK-NEXT: store i32 [[TMP2]], ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
%arrayidx = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %0
%1 = load i32, ptr %arrayidx, align 8
%idxprom1 = sext i32 %1 to i64
%arrayidx2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %idxprom1
%2 = load i32, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 %indvars.iv
%3 = load i32, ptr %arrayidx4, align 4
%idxprom5 = sext i32 %3 to i64
%arrayidx6 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %idxprom5
store i32 %2, ptr %arrayidx6, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 256
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; This function uses a stride that is generally too big to benefit from vectorization without
; really good support for a gather load. But if we don't vectorize the pointer induction,
; then we don't need to extract the pointers out of vector of pointers,
; and the vectorization becomes profitable.
define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
; CHECK-LABEL: @PR27826(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[T0:%.*]] = sext i32 [[N]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[T0]], -1
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 5
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 32
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP119:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP120:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP121:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP122:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 32
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 64
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 128
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 160
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 192
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 224
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 256
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 288
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 320
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 352
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 384
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP19]], align 4
; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP22]], align 4
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP35]], i32 0
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP36]], i32 1
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i32 2
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i32 3
; CHECK-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP23]], align 4
; CHECK-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP45:%.*]] = load float, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP46:%.*]] = load float, ptr [[TMP26]], align 4
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x float> poison, float [[TMP43]], i32 0
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i32 1
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP45]], i32 2
; CHECK-NEXT: [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP46]], i32 3
; CHECK-NEXT: [[TMP51:%.*]] = load float, ptr [[TMP27]], align 4
; CHECK-NEXT: [[TMP52:%.*]] = load float, ptr [[TMP28]], align 4
; CHECK-NEXT: [[TMP53:%.*]] = load float, ptr [[TMP29]], align 4
; CHECK-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP30]], align 4
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x float> poison, float [[TMP51]], i32 0
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x float> [[TMP55]], float [[TMP52]], i32 1
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x float> [[TMP56]], float [[TMP53]], i32 2
; CHECK-NEXT: [[TMP58:%.*]] = insertelement <4 x float> [[TMP57]], float [[TMP54]], i32 3
; CHECK-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP31]], align 4
; CHECK-NEXT: [[TMP60:%.*]] = load float, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP33]], align 4
; CHECK-NEXT: [[TMP62:%.*]] = load float, ptr [[TMP34]], align 4
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x float> poison, float [[TMP59]], i32 0
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP83:%.*]] = load float, ptr [[TMP67]], align 4
; CHECK-NEXT: [[TMP84:%.*]] = load float, ptr [[TMP68]], align 4
; CHECK-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP69]], align 4
; CHECK-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP70]], align 4
; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x float> poison, float [[TMP83]], i32 0
; CHECK-NEXT: [[TMP88:%.*]] = insertelement <4 x float> [[TMP87]], float [[TMP84]], i32 1
; CHECK-NEXT: [[TMP89:%.*]] = insertelement <4 x float> [[TMP88]], float [[TMP85]], i32 2
; CHECK-NEXT: [[TMP90:%.*]] = insertelement <4 x float> [[TMP89]], float [[TMP86]], i32 3
; CHECK-NEXT: [[TMP91:%.*]] = load float, ptr [[TMP71]], align 4
; CHECK-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP72]], align 4
; CHECK-NEXT: [[TMP93:%.*]] = load float, ptr [[TMP73]], align 4
; CHECK-NEXT: [[TMP94:%.*]] = load float, ptr [[TMP74]], align 4
; CHECK-NEXT: [[TMP95:%.*]] = insertelement <4 x float> poison, float [[TMP91]], i32 0
; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x float> [[TMP95]], float [[TMP92]], i32 1
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <4 x float> [[TMP96]], float [[TMP93]], i32 2
; CHECK-NEXT: [[TMP98:%.*]] = insertelement <4 x float> [[TMP97]], float [[TMP94]], i32 3
; CHECK-NEXT: [[TMP99:%.*]] = load float, ptr [[TMP75]], align 4
; CHECK-NEXT: [[TMP100:%.*]] = load float, ptr [[TMP76]], align 4
; CHECK-NEXT: [[TMP101:%.*]] = load float, ptr [[TMP77]], align 4
; CHECK-NEXT: [[TMP102:%.*]] = load float, ptr [[TMP78]], align 4
; CHECK-NEXT: [[TMP103:%.*]] = insertelement <4 x float> poison, float [[TMP99]], i32 0
; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x float> [[TMP103]], float [[TMP100]], i32 1
; CHECK-NEXT: [[TMP105:%.*]] = insertelement <4 x float> [[TMP104]], float [[TMP101]], i32 2
; CHECK-NEXT: [[TMP106:%.*]] = insertelement <4 x float> [[TMP105]], float [[TMP102]], i32 3
; CHECK-NEXT: [[TMP107:%.*]] = load float, ptr [[TMP79]], align 4
; CHECK-NEXT: [[TMP108:%.*]] = load float, ptr [[TMP80]], align 4
; CHECK-NEXT: [[TMP109:%.*]] = load float, ptr [[TMP81]], align 4
; CHECK-NEXT: [[TMP110:%.*]] = load float, ptr [[TMP82]], align 4
; CHECK-NEXT: [[TMP111:%.*]] = insertelement <4 x float> poison, float [[TMP107]], i32 0
; CHECK-NEXT: [[TMP112:%.*]] = insertelement <4 x float> [[TMP111]], float [[TMP108]], i32 1
; CHECK-NEXT: [[TMP113:%.*]] = insertelement <4 x float> [[TMP112]], float [[TMP109]], i32 2
; CHECK-NEXT: [[TMP114:%.*]] = insertelement <4 x float> [[TMP113]], float [[TMP110]], i32 3
; CHECK-NEXT: [[TMP115:%.*]] = fadd fast <4 x float> [[TMP42]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP116:%.*]] = fadd fast <4 x float> [[TMP50]], [[VEC_PHI1]]
; CHECK-NEXT: [[TMP117:%.*]] = fadd fast <4 x float> [[TMP58]], [[VEC_PHI2]]
; CHECK-NEXT: [[TMP118:%.*]] = fadd fast <4 x float> [[TMP66]], [[VEC_PHI3]]
; CHECK-NEXT: [[TMP119]] = fadd fast <4 x float> [[TMP115]], [[TMP90]]
; CHECK-NEXT: [[TMP120]] = fadd fast <4 x float> [[TMP116]], [[TMP98]]
; CHECK-NEXT: [[TMP121]] = fadd fast <4 x float> [[TMP117]], [[TMP106]]
; CHECK-NEXT: [[TMP122]] = fadd fast <4 x float> [[TMP118]], [[TMP114]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP123:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP123]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP120]], [[TMP119]]
; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[TMP121]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast <4 x float> [[TMP122]], [[BIN_RDX4]]
; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX5]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP124]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[PREHEADER]] ]
; CHECK-NEXT: br label [[FOR:%.*]]
; CHECK: for:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR]] ]
; CHECK-NEXT: [[S_02:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD4:%.*]], [[FOR]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[T1]], [[S_02]]
; CHECK-NEXT: [[ADD4]] = fadd fast float [[ADD]], [[T2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[T0]]
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loopexit:
; CHECK-NEXT: [[ADD4_LCSSA:%.*]] = phi float [ [[ADD4]], [[FOR]] ], [ [[TMP124]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD4_LCSSA]], [[LOOPEXIT]] ]
; CHECK-NEXT: ret float [[S_0_LCSSA]]
;
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %preheader, label %for.end
preheader:
%t0 = sext i32 %n to i64
br label %for
for:
%indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
%s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
%t1 = load float, ptr %arrayidx, align 4
%arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
%t2 = load float, ptr %arrayidx3, align 4
%add = fadd fast float %t1, %s.02
%add4 = fadd fast float %add, %t2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
%cmp1 = icmp slt i64 %indvars.iv.next, %t0
br i1 %cmp1, label %for, label %loopexit
loopexit:
%add4.lcssa = phi float [ %add4, %for ]
br label %for.end
for.end:
%s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
ret float %s.0.lcssa
}
define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-LABEL: @multi_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX6:%.*]] = call i64 @llvm.umax.i64(i64 [[B:%.*]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX6]], -1
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[UMIN7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN7]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 30
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[UMIN]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = add i32 1, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 8
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 4, i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP12]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META4:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META7:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT13]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT14]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT14]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i1> [[TMP17]], [[TMP15]]
; CHECK-NEXT: [[TMP20:%.*]] = and <2 x i1> [[TMP18]], [[TMP16]]
; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i1> [[TMP19]] to <2 x i8>
; CHECK-NEXT: [[TMP22:%.*]] = zext <2 x i1> [[TMP20]] to <2 x i8>
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i8> [[TMP22]], i32 1
; CHECK-NEXT: store i8 [[TMP23]], ptr [[DST]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A]]
; CHECK-NEXT: br i1 [[EC_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
; CHECK: loop.latch:
; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1]], align 8
; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2]], align 8
; CHECK-NEXT: [[CMP55_US:%.*]] = icmp eq i64 [[L_1]], 0
; CHECK-NEXT: [[CMP_I_US:%.*]] = icmp ne i64 [[L_2]], 0
; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_I_US]], [[CMP55_US]]
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[AND]] to i8
; CHECK-NEXT: store i8 [[EXT]], ptr [[DST]], align 1
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv.1.wide = phi i64 [ 0, %entry ], [ %iv.1.next.wide, %loop.latch ]
%iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop.latch ]
%ec.1 = icmp ult i64 %iv.1.wide, %A
br i1 %ec.1, label %loop.latch, label %exit
loop.latch:
%l.1 = load i64, ptr %src.1, align 8
%l.2 = load i64, ptr %src.2, align 8
%cmp55.us = icmp eq i64 %l.1, 0
%cmp.i.us = icmp ne i64 %l.2, 0
%and = and i1 %cmp.i.us, %cmp55.us
%ext = zext i1 %and to i8
store i8 %ext, ptr %dst, align 1
%iv.1.next = add i32 %iv.1, 1
%iv.1.next.wide = zext i32 %iv.1.next to i64
%ec.2 = icmp ult i64 %iv.1.next.wide, %B
br i1 %ec.2, label %loop, label %exit
exit:
ret void
}
define i1 @any_of_cost(ptr %start, ptr %end) #0 {
; CHECK-LABEL: @any_of_cost(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 40
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 40
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 8
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 8
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP6]], i64 8
; CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP14]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP15]], i32 1
; CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP12]], align 8
; CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP13]], align 8
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x ptr> [[TMP20]], ptr [[TMP19]], i32 1
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <2 x ptr> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <2 x ptr> [[TMP21]], zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], <i1 true, i1 true>
; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], <i1 true, i1 true>
; CHECK-NEXT: [[TMP26]] = or <2 x i1> [[VEC_PHI]], [[TMP24]]
; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i1> [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[BIN_RDX]])
; CHECK-NEXT: [[TMP30:%.*]] = freeze i1 [[TMP29]]
; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP30]], i1 false, i1 false
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[ANY_OF:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANY_OF_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 8
; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[GEP]], align 8
; CHECK-NEXT: [[CMP13_NOT_NOT:%.*]] = icmp eq ptr [[L]], null
; CHECK-NEXT: [[ANY_OF_NEXT]] = select i1 [[CMP13_NOT_NOT]], i1 [[ANY_OF]], i1 false
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 40
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[ANY_OF_NEXT_LCSSA:%.*]] = phi i1 [ [[ANY_OF_NEXT]], [[LOOP]] ]
; CHECK-NEXT: ret i1 [[ANY_OF_NEXT_LCSSA]]
;
entry:
br label %loop
loop:
%any.of = phi i1 [ false, %entry ], [ %any.of.next, %loop ]
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
%gep = getelementptr i8, ptr %ptr.iv, i64 8
%l = load ptr, ptr %gep, align 8
%cmp13.not.not = icmp eq ptr %l, null
%any.of.next = select i1 %cmp13.not.not, i1 %any.of, i1 false
%ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 40
%cmp.not = icmp eq ptr %ptr.iv, %end
br i1 %cmp.not, label %exit, label %loop
exit:
ret i1 %any.of.next
}
define i64 @avx512_cond_load_cost(ptr %src, i32 %a, i64 %b, i32 %c, i32 %d) #1 {
; CHECK-LABEL: @avx512_cond_load_cost(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 0
; CHECK-NEXT: br i1 [[C_1]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = urem i32 [[A:%.*]], [[C:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = sub i32 0, [[TMP0]]
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[C]], [[D:%.*]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[DIV]], [[MUL]]
; CHECK-NEXT: [[EXT:%.*]] = sext i32 [[OR]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr { i64, i64, i64 }, ptr [[SRC:%.*]], i64 [[EXT]], i32 2
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
; CHECK-NEXT: [[OR_2:%.*]] = or i64 [[L]], [[B:%.*]]
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ [[OR_2]], [[IF_THEN]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], [[C]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i64 [ [[RES]], [[LOOP_LATCH]] ]
; CHECK-NEXT: ret i64 [[RES_LCSSA]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%c.1 = icmp slt i32 %iv, 0
br i1 %c.1, label %if.then, label %loop.latch
if.then:
%1 = urem i32 %a, %c
%mul = sub i32 0, %1
%div = udiv i32 %c, %d
%or = or i32 %div, %mul
%ext = sext i32 %or to i64
%gep = getelementptr { i64, i64, i64 }, ptr %src, i64 %ext, i32 2
%l = load i64, ptr %gep, align 8
%or.2 = or i64 %l, %b
br label %loop.latch
loop.latch:
%res = phi i64 [ 0, %loop.header ], [ %or.2, %if.then ]
%iv.next = add i32 %iv, 1
%ec = icmp ult i32 %iv, %c
br i1 %ec, label %loop.header, label %exit
exit:
ret i64 %res
}
define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-LABEL: @cost_duplicate_recipe_for_sinking(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 16, i64 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE36:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP12]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i32 0
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP15]], align 8
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x double>, ptr [[TMP16]], align 8
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x double>, ptr [[TMP17]], align 8
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <16 x double>, ptr [[TMP18]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x double> [[WIDE_VEC1]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x double> [[WIDE_VEC2]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x double> [[WIDE_VEC3]], <16 x double> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
; CHECK-NEXT: [[TMP19:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC]], zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC4]], zeroinitializer
; CHECK-NEXT: [[TMP21:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC5]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC6]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1
; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP28:%.*]] = shl nsw i64 [[TMP27]], 2
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP28]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP29]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2
; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP32:%.*]] = shl nsw i64 [[TMP31]], 2
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP32]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP33]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3
; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP36:%.*]] = shl nsw i64 [[TMP35]], 2
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP36]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP37]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
; CHECK-NEXT: [[TMP39:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP39]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP40]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
; CHECK: pred.store.if15:
; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP43:%.*]] = shl nsw i64 [[TMP42]], 2
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP43]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP44]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2
; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP47:%.*]] = shl nsw i64 [[TMP46]], 2
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP47]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP48]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP51:%.*]] = shl nsw i64 [[TMP50]], 2
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP51]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP52]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP21]], i32 0
; CHECK-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
; CHECK: pred.store.if21:
; CHECK-NEXT: [[TMP54:%.*]] = shl nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP54]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP55]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP21]], i32 1
; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
; CHECK: pred.store.if23:
; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 9
; CHECK-NEXT: [[TMP58:%.*]] = shl nsw i64 [[TMP57]], 2
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP58]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP59]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue24:
; CHECK-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP21]], i32 2
; CHECK-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
; CHECK: pred.store.if25:
; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 10
; CHECK-NEXT: [[TMP62:%.*]] = shl nsw i64 [[TMP61]], 2
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP62]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP63]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.continue26:
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP21]], i32 3
; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
; CHECK: pred.store.if27:
; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 11
; CHECK-NEXT: [[TMP66:%.*]] = shl nsw i64 [[TMP65]], 2
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP66]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP67]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
; CHECK: pred.store.continue28:
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i1> [[TMP22]], i32 0
; CHECK-NEXT: br i1 [[TMP68]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
; CHECK: pred.store.if29:
; CHECK-NEXT: [[TMP69:%.*]] = shl nsw i64 [[TMP6]], 2
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP69]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP70]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
; CHECK: pred.store.continue30:
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP22]], i32 1
; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
; CHECK: pred.store.if31:
; CHECK-NEXT: [[TMP72:%.*]] = add i64 [[INDEX]], 13
; CHECK-NEXT: [[TMP73:%.*]] = shl nsw i64 [[TMP72]], 2
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP73]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP74]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
; CHECK: pred.store.continue32:
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <4 x i1> [[TMP22]], i32 2
; CHECK-NEXT: br i1 [[TMP75]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
; CHECK: pred.store.if33:
; CHECK-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP77:%.*]] = shl nsw i64 [[TMP76]], 2
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP77]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP78]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]]
; CHECK: pred.store.continue34:
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i1> [[TMP22]], i32 3
; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36]]
; CHECK: pred.store.if35:
; CHECK-NEXT: [[TMP80:%.*]] = add i64 [[INDEX]], 15
; CHECK-NEXT: [[TMP81:%.*]] = shl nsw i64 [[TMP80]], 2
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP81]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP82]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]]
; CHECK: pred.store.continue36:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP83:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP83]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[IV_SHL:%.*]] = shl nsw i64 [[IV]], 2
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[IV_SHL]]
; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_0]], align 8
; CHECK-NEXT: [[C:%.*]] = fcmp oeq double [[L]], 0.000000e+00
; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
; CHECK: if.then:
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[A]], i64 [[IV_SHL]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%iv.shl = shl nsw i64 %iv, 2
%gep.0 = getelementptr nusw double, ptr %A, i64 %iv.shl
%l = load double, ptr %gep.0, align 8
%c = fcmp oeq double %l, 0.000000e+00
br i1 %c, label %if.then, label %loop.latch
if.then:
%gep.1 = getelementptr double, ptr %A, i64 %iv.shl
store double 0.000000e+00, ptr %gep.1, align 8
br label %loop.latch
loop.latch:
%iv.next = add nsw i64 %iv, 1
%ec = icmp eq i64 %iv, %N
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-LABEL: @cost_assume(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -9
; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 9
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[N:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3]] = add <2 x i64> [[VEC_PHI]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP4]] = add <2 x i64> [[VEC_PHI2]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP5]] = add <2 x i64> [[VEC_PHI3]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP6]] = add <2 x i64> [[VEC_PHI4]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]])
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]])
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP5]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <2 x i64> [[TMP6]], [[BIN_RDX5]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX6]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP22]] = add i64 [[TMP21]], 1
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw [9 x i8], ptr null, i64 [[IV_NEXT]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[GEP]], [[END]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP22]], [[LOOP]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[DOTLCSSA]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%0 = phi i64 [ 0, %entry ], [ %1, %loop ]
%1 = add i64 %0, 1
%iv.next = add nsw i64 %iv, 1
%c = icmp ne i64 %N, 0
tail call void @llvm.assume(i1 %c)
%gep = getelementptr nusw [ 9 x i8 ], ptr null, i64 %iv.next
%ec = icmp eq ptr %gep, %end
br i1 %ec, label %exit, label %loop
exit:
ret i64 %1
}
declare void @llvm.assume(i1 noundef) #0
attributes #0 = { "target-cpu"="penryn" }
attributes #1 = { "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
attributes #2 = { "target-cpu"="znver3" }