[LV] Use SCEV for subtraction of src/sink for diff runtime checks.

Instead of expanding the src/sink SCEV expressions and emitting an IR
sub to compute the difference, the subtraction can be directly be
performed by ScalarEvolution. This allows the subtraction to be
simplified by SCEV, which in turn can reduced the number of redundant
runtime check instructions generated.

It also allows to generate checks that are invariant w.r.t. an outer
loop, if he inner loop AddRecs have the same outer loop AddRec as start.
This commit is contained in:
Florian Hahn
2023-11-22 12:48:03 +00:00
parent eed17dcf76
commit 32d1197a8f
6 changed files with 184 additions and 313 deletions

View File

@@ -1793,9 +1793,9 @@ Value *llvm::addDiffRuntimeChecks(
auto *VFTimesUFTimesSize =
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
ConstantInt::get(Ty, IC * C.AccessSize));
Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
Value *Diff = ChkBuilder.CreateSub(Sink, Src);
auto &SE = *Expander.getSE();
Value *Diff = Expander.expandCodeFor(
SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
Value *IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
if (C.NeedsFreeze)

View File

@@ -117,10 +117,7 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 7
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N]], 3
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[B1]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[A2]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP3]]
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:

View File

@@ -174,10 +174,8 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) {
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP7:%.*]] = add nuw i64 [[B1]], 4
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[A2]], 4
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], 16
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[B1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4

View File

@@ -6,7 +6,6 @@
define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) {
; CHECK-LABEL: @test_large_number_of_group(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[OFF_MUL_2:%.*]] = shl i64 [[OFF:%.*]], 1
; CHECK-NEXT: [[OFF_MUL_3:%.*]] = mul i64 [[OFF]], 3
; CHECK-NEXT: [[OFF_MUL_4:%.*]] = shl i64 [[OFF]], 2
@@ -21,298 +20,179 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[OFF]], 4
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[DST1]], [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -40
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[DST1]], [[OFF_MUL_8]]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -40
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[OFF]], 24
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[DST1]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], -40
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK2:%.*]] = icmp ult i64 [[TMP9]], 32
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK2]]
; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[OFF]], 5
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[DST1]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], -40
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP12]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP13]], 32
; CHECK-NEXT: [[CONFLICT_RDX4:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK3]]
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[OFF]], 40
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[DST1]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], -40
; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP17]], 32
; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX4]], [[DIFF_CHECK5]]
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[OFF]], 48
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[DST1]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], -40
; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP20]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK7:%.*]] = icmp ult i64 [[TMP21]], 32
; CHECK-NEXT: [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX6]], [[DIFF_CHECK7]]
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[OFF]], 56
; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[DST1]], [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP23]], -40
; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK9:%.*]] = icmp ult i64 [[TMP25]], 32
; CHECK-NEXT: [[CONFLICT_RDX10:%.*]] = or i1 [[CONFLICT_RDX8]], [[DIFF_CHECK9]]
; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[OFF]], 6
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[DST1]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], -40
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP28]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK11:%.*]] = icmp ult i64 [[TMP29]], 32
; CHECK-NEXT: [[CONFLICT_RDX12:%.*]] = or i1 [[CONFLICT_RDX10]], [[DIFF_CHECK11]]
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[OFF]], 72
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[DST1]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], -40
; CHECK-NEXT: [[TMP33:%.*]] = sub i64 [[TMP32]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK13:%.*]] = icmp ult i64 [[TMP33]], 32
; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX12]], [[DIFF_CHECK13]]
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[OFF]], 80
; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[DST1]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP35]], -40
; CHECK-NEXT: [[TMP37:%.*]] = sub i64 [[TMP36]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK15:%.*]] = icmp ult i64 [[TMP37]], 32
; CHECK-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX14]], [[DIFF_CHECK15]]
; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[OFF]], 88
; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[DST1]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], -40
; CHECK-NEXT: [[TMP41:%.*]] = sub i64 [[TMP40]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK17:%.*]] = icmp ult i64 [[TMP41]], 32
; CHECK-NEXT: [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX16]], [[DIFF_CHECK17]]
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[OFF]], 96
; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[DST1]], [[TMP42]]
; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], -40
; CHECK-NEXT: [[TMP45:%.*]] = sub i64 [[TMP44]], [[TMP4]]
; CHECK-NEXT: [[DIFF_CHECK19:%.*]] = icmp ult i64 [[TMP45]], 32
; CHECK-NEXT: [[CONFLICT_RDX20:%.*]] = or i1 [[CONFLICT_RDX18]], [[DIFF_CHECK19]]
; CHECK-NEXT: [[TMP46:%.*]] = sub i64 [[TMP8]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK21:%.*]] = icmp ult i64 [[TMP46]], 32
; CHECK-NEXT: [[CONFLICT_RDX22:%.*]] = or i1 [[CONFLICT_RDX20]], [[DIFF_CHECK21]]
; CHECK-NEXT: [[TMP47:%.*]] = sub i64 [[TMP12]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK23:%.*]] = icmp ult i64 [[TMP47]], 32
; CHECK-NEXT: [[CONFLICT_RDX24:%.*]] = or i1 [[CONFLICT_RDX22]], [[DIFF_CHECK23]]
; CHECK-NEXT: [[TMP48:%.*]] = sub i64 [[TMP16]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK25:%.*]] = icmp ult i64 [[TMP48]], 32
; CHECK-NEXT: [[CONFLICT_RDX26:%.*]] = or i1 [[CONFLICT_RDX24]], [[DIFF_CHECK25]]
; CHECK-NEXT: [[TMP49:%.*]] = sub i64 [[TMP20]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK27:%.*]] = icmp ult i64 [[TMP49]], 32
; CHECK-NEXT: [[CONFLICT_RDX28:%.*]] = or i1 [[CONFLICT_RDX26]], [[DIFF_CHECK27]]
; CHECK-NEXT: [[TMP50:%.*]] = sub i64 [[TMP24]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK29:%.*]] = icmp ult i64 [[TMP50]], 32
; CHECK-NEXT: [[CONFLICT_RDX30:%.*]] = or i1 [[CONFLICT_RDX28]], [[DIFF_CHECK29]]
; CHECK-NEXT: [[TMP51:%.*]] = sub i64 [[TMP28]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK31:%.*]] = icmp ult i64 [[TMP51]], 32
; CHECK-NEXT: [[CONFLICT_RDX32:%.*]] = or i1 [[CONFLICT_RDX30]], [[DIFF_CHECK31]]
; CHECK-NEXT: [[TMP52:%.*]] = sub i64 [[TMP32]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK33:%.*]] = icmp ult i64 [[TMP52]], 32
; CHECK-NEXT: [[CONFLICT_RDX34:%.*]] = or i1 [[CONFLICT_RDX32]], [[DIFF_CHECK33]]
; CHECK-NEXT: [[TMP53:%.*]] = sub i64 [[TMP36]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK35:%.*]] = icmp ult i64 [[TMP53]], 32
; CHECK-NEXT: [[CONFLICT_RDX36:%.*]] = or i1 [[CONFLICT_RDX34]], [[DIFF_CHECK35]]
; CHECK-NEXT: [[TMP54:%.*]] = sub i64 [[TMP40]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK37:%.*]] = icmp ult i64 [[TMP54]], 32
; CHECK-NEXT: [[CONFLICT_RDX38:%.*]] = or i1 [[CONFLICT_RDX36]], [[DIFF_CHECK37]]
; CHECK-NEXT: [[TMP55:%.*]] = sub i64 [[TMP44]], [[TMP2]]
; CHECK-NEXT: [[DIFF_CHECK39:%.*]] = icmp ult i64 [[TMP55]], 32
; CHECK-NEXT: [[CONFLICT_RDX40:%.*]] = or i1 [[CONFLICT_RDX38]], [[DIFF_CHECK39]]
; CHECK-NEXT: [[TMP56:%.*]] = sub i64 [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK41:%.*]] = icmp ult i64 [[TMP56]], 32
; CHECK-NEXT: [[CONFLICT_RDX42:%.*]] = or i1 [[CONFLICT_RDX40]], [[DIFF_CHECK41]]
; CHECK-NEXT: [[TMP57:%.*]] = sub i64 [[TMP16]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK43:%.*]] = icmp ult i64 [[TMP57]], 32
; CHECK-NEXT: [[CONFLICT_RDX44:%.*]] = or i1 [[CONFLICT_RDX42]], [[DIFF_CHECK43]]
; CHECK-NEXT: [[TMP58:%.*]] = sub i64 [[TMP20]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK45:%.*]] = icmp ult i64 [[TMP58]], 32
; CHECK-NEXT: [[CONFLICT_RDX46:%.*]] = or i1 [[CONFLICT_RDX44]], [[DIFF_CHECK45]]
; CHECK-NEXT: [[TMP59:%.*]] = sub i64 [[TMP24]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK47:%.*]] = icmp ult i64 [[TMP59]], 32
; CHECK-NEXT: [[CONFLICT_RDX48:%.*]] = or i1 [[CONFLICT_RDX46]], [[DIFF_CHECK47]]
; CHECK-NEXT: [[TMP60:%.*]] = sub i64 [[TMP28]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK49:%.*]] = icmp ult i64 [[TMP60]], 32
; CHECK-NEXT: [[CONFLICT_RDX50:%.*]] = or i1 [[CONFLICT_RDX48]], [[DIFF_CHECK49]]
; CHECK-NEXT: [[TMP61:%.*]] = sub i64 [[TMP32]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK51:%.*]] = icmp ult i64 [[TMP61]], 32
; CHECK-NEXT: [[CONFLICT_RDX52:%.*]] = or i1 [[CONFLICT_RDX50]], [[DIFF_CHECK51]]
; CHECK-NEXT: [[TMP62:%.*]] = sub i64 [[TMP36]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK53:%.*]] = icmp ult i64 [[TMP62]], 32
; CHECK-NEXT: [[CONFLICT_RDX54:%.*]] = or i1 [[CONFLICT_RDX52]], [[DIFF_CHECK53]]
; CHECK-NEXT: [[TMP63:%.*]] = sub i64 [[TMP40]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK55:%.*]] = icmp ult i64 [[TMP63]], 32
; CHECK-NEXT: [[CONFLICT_RDX56:%.*]] = or i1 [[CONFLICT_RDX54]], [[DIFF_CHECK55]]
; CHECK-NEXT: [[TMP64:%.*]] = sub i64 [[TMP44]], [[TMP8]]
; CHECK-NEXT: [[DIFF_CHECK57:%.*]] = icmp ult i64 [[TMP64]], 32
; CHECK-NEXT: [[CONFLICT_RDX58:%.*]] = or i1 [[CONFLICT_RDX56]], [[DIFF_CHECK57]]
; CHECK-NEXT: [[TMP65:%.*]] = sub i64 [[TMP16]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK59:%.*]] = icmp ult i64 [[TMP65]], 32
; CHECK-NEXT: [[CONFLICT_RDX60:%.*]] = or i1 [[CONFLICT_RDX58]], [[DIFF_CHECK59]]
; CHECK-NEXT: [[TMP66:%.*]] = sub i64 [[TMP20]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK61:%.*]] = icmp ult i64 [[TMP66]], 32
; CHECK-NEXT: [[CONFLICT_RDX62:%.*]] = or i1 [[CONFLICT_RDX60]], [[DIFF_CHECK61]]
; CHECK-NEXT: [[TMP67:%.*]] = sub i64 [[TMP24]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK63:%.*]] = icmp ult i64 [[TMP67]], 32
; CHECK-NEXT: [[CONFLICT_RDX64:%.*]] = or i1 [[CONFLICT_RDX62]], [[DIFF_CHECK63]]
; CHECK-NEXT: [[TMP68:%.*]] = sub i64 [[TMP28]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK65:%.*]] = icmp ult i64 [[TMP68]], 32
; CHECK-NEXT: [[CONFLICT_RDX66:%.*]] = or i1 [[CONFLICT_RDX64]], [[DIFF_CHECK65]]
; CHECK-NEXT: [[TMP69:%.*]] = sub i64 [[TMP32]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK67:%.*]] = icmp ult i64 [[TMP69]], 32
; CHECK-NEXT: [[CONFLICT_RDX68:%.*]] = or i1 [[CONFLICT_RDX66]], [[DIFF_CHECK67]]
; CHECK-NEXT: [[TMP70:%.*]] = sub i64 [[TMP36]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK69:%.*]] = icmp ult i64 [[TMP70]], 32
; CHECK-NEXT: [[CONFLICT_RDX70:%.*]] = or i1 [[CONFLICT_RDX68]], [[DIFF_CHECK69]]
; CHECK-NEXT: [[TMP71:%.*]] = sub i64 [[TMP40]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK71:%.*]] = icmp ult i64 [[TMP71]], 32
; CHECK-NEXT: [[CONFLICT_RDX72:%.*]] = or i1 [[CONFLICT_RDX70]], [[DIFF_CHECK71]]
; CHECK-NEXT: [[TMP72:%.*]] = sub i64 [[TMP44]], [[TMP12]]
; CHECK-NEXT: [[DIFF_CHECK73:%.*]] = icmp ult i64 [[TMP72]], 32
; CHECK-NEXT: [[CONFLICT_RDX74:%.*]] = or i1 [[CONFLICT_RDX72]], [[DIFF_CHECK73]]
; CHECK-NEXT: [[TMP73:%.*]] = sub i64 [[TMP20]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK75:%.*]] = icmp ult i64 [[TMP73]], 32
; CHECK-NEXT: [[CONFLICT_RDX76:%.*]] = or i1 [[CONFLICT_RDX74]], [[DIFF_CHECK75]]
; CHECK-NEXT: [[TMP74:%.*]] = sub i64 [[TMP24]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK77:%.*]] = icmp ult i64 [[TMP74]], 32
; CHECK-NEXT: [[CONFLICT_RDX78:%.*]] = or i1 [[CONFLICT_RDX76]], [[DIFF_CHECK77]]
; CHECK-NEXT: [[TMP75:%.*]] = sub i64 [[TMP28]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK79:%.*]] = icmp ult i64 [[TMP75]], 32
; CHECK-NEXT: [[CONFLICT_RDX80:%.*]] = or i1 [[CONFLICT_RDX78]], [[DIFF_CHECK79]]
; CHECK-NEXT: [[TMP76:%.*]] = sub i64 [[TMP32]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK81:%.*]] = icmp ult i64 [[TMP76]], 32
; CHECK-NEXT: [[CONFLICT_RDX82:%.*]] = or i1 [[CONFLICT_RDX80]], [[DIFF_CHECK81]]
; CHECK-NEXT: [[TMP77:%.*]] = sub i64 [[TMP36]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK83:%.*]] = icmp ult i64 [[TMP77]], 32
; CHECK-NEXT: [[CONFLICT_RDX84:%.*]] = or i1 [[CONFLICT_RDX82]], [[DIFF_CHECK83]]
; CHECK-NEXT: [[TMP78:%.*]] = sub i64 [[TMP40]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK85:%.*]] = icmp ult i64 [[TMP78]], 32
; CHECK-NEXT: [[CONFLICT_RDX86:%.*]] = or i1 [[CONFLICT_RDX84]], [[DIFF_CHECK85]]
; CHECK-NEXT: [[TMP79:%.*]] = sub i64 [[TMP44]], [[TMP16]]
; CHECK-NEXT: [[DIFF_CHECK87:%.*]] = icmp ult i64 [[TMP79]], 32
; CHECK-NEXT: [[CONFLICT_RDX88:%.*]] = or i1 [[CONFLICT_RDX86]], [[DIFF_CHECK87]]
; CHECK-NEXT: [[TMP80:%.*]] = sub i64 [[TMP24]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK89:%.*]] = icmp ult i64 [[TMP80]], 32
; CHECK-NEXT: [[CONFLICT_RDX90:%.*]] = or i1 [[CONFLICT_RDX88]], [[DIFF_CHECK89]]
; CHECK-NEXT: [[TMP81:%.*]] = sub i64 [[TMP28]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK91:%.*]] = icmp ult i64 [[TMP81]], 32
; CHECK-NEXT: [[CONFLICT_RDX92:%.*]] = or i1 [[CONFLICT_RDX90]], [[DIFF_CHECK91]]
; CHECK-NEXT: [[TMP82:%.*]] = sub i64 [[TMP32]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK93:%.*]] = icmp ult i64 [[TMP82]], 32
; CHECK-NEXT: [[CONFLICT_RDX94:%.*]] = or i1 [[CONFLICT_RDX92]], [[DIFF_CHECK93]]
; CHECK-NEXT: [[TMP83:%.*]] = sub i64 [[TMP36]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK95:%.*]] = icmp ult i64 [[TMP83]], 32
; CHECK-NEXT: [[CONFLICT_RDX96:%.*]] = or i1 [[CONFLICT_RDX94]], [[DIFF_CHECK95]]
; CHECK-NEXT: [[TMP84:%.*]] = sub i64 [[TMP40]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK97:%.*]] = icmp ult i64 [[TMP84]], 32
; CHECK-NEXT: [[CONFLICT_RDX98:%.*]] = or i1 [[CONFLICT_RDX96]], [[DIFF_CHECK97]]
; CHECK-NEXT: [[TMP85:%.*]] = sub i64 [[TMP44]], [[TMP20]]
; CHECK-NEXT: [[DIFF_CHECK99:%.*]] = icmp ult i64 [[TMP85]], 32
; CHECK-NEXT: [[CONFLICT_RDX100:%.*]] = or i1 [[CONFLICT_RDX98]], [[DIFF_CHECK99]]
; CHECK-NEXT: [[TMP86:%.*]] = sub i64 [[TMP28]], [[TMP24]]
; CHECK-NEXT: [[DIFF_CHECK101:%.*]] = icmp ult i64 [[TMP86]], 32
; CHECK-NEXT: [[CONFLICT_RDX102:%.*]] = or i1 [[CONFLICT_RDX100]], [[DIFF_CHECK101]]
; CHECK-NEXT: [[TMP87:%.*]] = sub i64 [[TMP32]], [[TMP24]]
; CHECK-NEXT: [[DIFF_CHECK103:%.*]] = icmp ult i64 [[TMP87]], 32
; CHECK-NEXT: [[CONFLICT_RDX104:%.*]] = or i1 [[CONFLICT_RDX102]], [[DIFF_CHECK103]]
; CHECK-NEXT: [[TMP88:%.*]] = sub i64 [[TMP36]], [[TMP24]]
; CHECK-NEXT: [[DIFF_CHECK105:%.*]] = icmp ult i64 [[TMP88]], 32
; CHECK-NEXT: [[CONFLICT_RDX106:%.*]] = or i1 [[CONFLICT_RDX104]], [[DIFF_CHECK105]]
; CHECK-NEXT: [[TMP89:%.*]] = sub i64 [[TMP40]], [[TMP24]]
; CHECK-NEXT: [[DIFF_CHECK107:%.*]] = icmp ult i64 [[TMP89]], 32
; CHECK-NEXT: [[CONFLICT_RDX108:%.*]] = or i1 [[CONFLICT_RDX106]], [[DIFF_CHECK107]]
; CHECK-NEXT: [[TMP90:%.*]] = sub i64 [[TMP44]], [[TMP24]]
; CHECK-NEXT: [[DIFF_CHECK109:%.*]] = icmp ult i64 [[TMP90]], 32
; CHECK-NEXT: [[CONFLICT_RDX110:%.*]] = or i1 [[CONFLICT_RDX108]], [[DIFF_CHECK109]]
; CHECK-NEXT: [[TMP91:%.*]] = sub i64 [[TMP32]], [[TMP28]]
; CHECK-NEXT: [[DIFF_CHECK111:%.*]] = icmp ult i64 [[TMP91]], 32
; CHECK-NEXT: [[CONFLICT_RDX112:%.*]] = or i1 [[CONFLICT_RDX110]], [[DIFF_CHECK111]]
; CHECK-NEXT: [[TMP92:%.*]] = sub i64 [[TMP36]], [[TMP28]]
; CHECK-NEXT: [[DIFF_CHECK113:%.*]] = icmp ult i64 [[TMP92]], 32
; CHECK-NEXT: [[CONFLICT_RDX114:%.*]] = or i1 [[CONFLICT_RDX112]], [[DIFF_CHECK113]]
; CHECK-NEXT: [[TMP93:%.*]] = sub i64 [[TMP40]], [[TMP28]]
; CHECK-NEXT: [[DIFF_CHECK115:%.*]] = icmp ult i64 [[TMP93]], 32
; CHECK-NEXT: [[CONFLICT_RDX116:%.*]] = or i1 [[CONFLICT_RDX114]], [[DIFF_CHECK115]]
; CHECK-NEXT: [[TMP94:%.*]] = sub i64 [[TMP44]], [[TMP28]]
; CHECK-NEXT: [[DIFF_CHECK117:%.*]] = icmp ult i64 [[TMP94]], 32
; CHECK-NEXT: [[CONFLICT_RDX118:%.*]] = or i1 [[CONFLICT_RDX116]], [[DIFF_CHECK117]]
; CHECK-NEXT: [[TMP95:%.*]] = sub i64 [[TMP36]], [[TMP32]]
; CHECK-NEXT: [[DIFF_CHECK119:%.*]] = icmp ult i64 [[TMP95]], 32
; CHECK-NEXT: [[CONFLICT_RDX120:%.*]] = or i1 [[CONFLICT_RDX118]], [[DIFF_CHECK119]]
; CHECK-NEXT: [[TMP96:%.*]] = sub i64 [[TMP40]], [[TMP32]]
; CHECK-NEXT: [[DIFF_CHECK121:%.*]] = icmp ult i64 [[TMP96]], 32
; CHECK-NEXT: [[CONFLICT_RDX122:%.*]] = or i1 [[CONFLICT_RDX120]], [[DIFF_CHECK121]]
; CHECK-NEXT: [[TMP97:%.*]] = sub i64 [[TMP44]], [[TMP32]]
; CHECK-NEXT: [[DIFF_CHECK123:%.*]] = icmp ult i64 [[TMP97]], 32
; CHECK-NEXT: [[CONFLICT_RDX124:%.*]] = or i1 [[CONFLICT_RDX122]], [[DIFF_CHECK123]]
; CHECK-NEXT: [[TMP98:%.*]] = sub i64 [[TMP40]], [[TMP36]]
; CHECK-NEXT: [[DIFF_CHECK125:%.*]] = icmp ult i64 [[TMP98]], 32
; CHECK-NEXT: [[CONFLICT_RDX126:%.*]] = or i1 [[CONFLICT_RDX124]], [[DIFF_CHECK125]]
; CHECK-NEXT: [[TMP99:%.*]] = sub i64 [[TMP44]], [[TMP36]]
; CHECK-NEXT: [[DIFF_CHECK127:%.*]] = icmp ult i64 [[TMP99]], 32
; CHECK-NEXT: [[CONFLICT_RDX128:%.*]] = or i1 [[CONFLICT_RDX126]], [[DIFF_CHECK127]]
; CHECK-NEXT: [[TMP100:%.*]] = sub i64 [[TMP44]], [[TMP40]]
; CHECK-NEXT: [[DIFF_CHECK129:%.*]] = icmp ult i64 [[TMP100]], 32
; CHECK-NEXT: [[CONFLICT_RDX130:%.*]] = or i1 [[CONFLICT_RDX128]], [[DIFF_CHECK129]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX130]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: [[DIFF_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK1]]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFF]], 24
; CHECK-NEXT: [[DIFF_CHECK2:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[CONFLICT_RDX3:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK2]]
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[OFF]], 5
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX3]], [[DIFF_CHECK4]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[OFF]], 40
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX5]], [[DIFF_CHECK6]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[OFF]], 48
; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[OFF]], 56
; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]]
; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[OFF]], 6
; CHECK-NEXT: [[DIFF_CHECK12:%.*]] = icmp ult i64 [[TMP6]], 32
; CHECK-NEXT: [[CONFLICT_RDX13:%.*]] = or i1 [[CONFLICT_RDX11]], [[DIFF_CHECK12]]
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[OFF]], 72
; CHECK-NEXT: [[DIFF_CHECK14:%.*]] = icmp ult i64 [[TMP7]], 32
; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX13]], [[DIFF_CHECK14]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[OFF]], 80
; CHECK-NEXT: [[DIFF_CHECK16:%.*]] = icmp ult i64 [[TMP8]], 32
; CHECK-NEXT: [[CONFLICT_RDX17:%.*]] = or i1 [[CONFLICT_RDX15]], [[DIFF_CHECK16]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[OFF]], 88
; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP9]], 32
; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX17]], [[DIFF_CHECK18]]
; CHECK-NEXT: [[DIFF_CHECK20:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX19]], [[DIFF_CHECK20]]
; CHECK-NEXT: [[DIFF_CHECK22:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX21]], [[DIFF_CHECK22]]
; CHECK-NEXT: [[DIFF_CHECK24:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[CONFLICT_RDX25:%.*]] = or i1 [[CONFLICT_RDX23]], [[DIFF_CHECK24]]
; CHECK-NEXT: [[DIFF_CHECK26:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX25]], [[DIFF_CHECK26]]
; CHECK-NEXT: [[DIFF_CHECK28:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[CONFLICT_RDX29:%.*]] = or i1 [[CONFLICT_RDX27]], [[DIFF_CHECK28]]
; CHECK-NEXT: [[DIFF_CHECK30:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX29]], [[DIFF_CHECK30]]
; CHECK-NEXT: [[DIFF_CHECK32:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[CONFLICT_RDX33:%.*]] = or i1 [[CONFLICT_RDX31]], [[DIFF_CHECK32]]
; CHECK-NEXT: [[DIFF_CHECK34:%.*]] = icmp ult i64 [[TMP6]], 32
; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX33]], [[DIFF_CHECK34]]
; CHECK-NEXT: [[DIFF_CHECK36:%.*]] = icmp ult i64 [[TMP7]], 32
; CHECK-NEXT: [[CONFLICT_RDX37:%.*]] = or i1 [[CONFLICT_RDX35]], [[DIFF_CHECK36]]
; CHECK-NEXT: [[DIFF_CHECK38:%.*]] = icmp ult i64 [[TMP8]], 32
; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX37]], [[DIFF_CHECK38]]
; CHECK-NEXT: [[DIFF_CHECK40:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX39]], [[DIFF_CHECK40]]
; CHECK-NEXT: [[DIFF_CHECK42:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[CONFLICT_RDX43:%.*]] = or i1 [[CONFLICT_RDX41]], [[DIFF_CHECK42]]
; CHECK-NEXT: [[DIFF_CHECK44:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[CONFLICT_RDX45:%.*]] = or i1 [[CONFLICT_RDX43]], [[DIFF_CHECK44]]
; CHECK-NEXT: [[DIFF_CHECK46:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[CONFLICT_RDX47:%.*]] = or i1 [[CONFLICT_RDX45]], [[DIFF_CHECK46]]
; CHECK-NEXT: [[DIFF_CHECK48:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[CONFLICT_RDX49:%.*]] = or i1 [[CONFLICT_RDX47]], [[DIFF_CHECK48]]
; CHECK-NEXT: [[DIFF_CHECK50:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[CONFLICT_RDX51:%.*]] = or i1 [[CONFLICT_RDX49]], [[DIFF_CHECK50]]
; CHECK-NEXT: [[DIFF_CHECK52:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[CONFLICT_RDX53:%.*]] = or i1 [[CONFLICT_RDX51]], [[DIFF_CHECK52]]
; CHECK-NEXT: [[DIFF_CHECK54:%.*]] = icmp ult i64 [[TMP6]], 32
; CHECK-NEXT: [[CONFLICT_RDX55:%.*]] = or i1 [[CONFLICT_RDX53]], [[DIFF_CHECK54]]
; CHECK-NEXT: [[DIFF_CHECK56:%.*]] = icmp ult i64 [[TMP7]], 32
; CHECK-NEXT: [[CONFLICT_RDX57:%.*]] = or i1 [[CONFLICT_RDX55]], [[DIFF_CHECK56]]
; CHECK-NEXT: [[DIFF_CHECK58:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[CONFLICT_RDX59:%.*]] = or i1 [[CONFLICT_RDX57]], [[DIFF_CHECK58]]
; CHECK-NEXT: [[DIFF_CHECK60:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[CONFLICT_RDX61:%.*]] = or i1 [[CONFLICT_RDX59]], [[DIFF_CHECK60]]
; CHECK-NEXT: [[DIFF_CHECK62:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[CONFLICT_RDX63:%.*]] = or i1 [[CONFLICT_RDX61]], [[DIFF_CHECK62]]
; CHECK-NEXT: [[DIFF_CHECK64:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX63]], [[DIFF_CHECK64]]
; CHECK-NEXT: [[DIFF_CHECK66:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[CONFLICT_RDX67:%.*]] = or i1 [[CONFLICT_RDX65]], [[DIFF_CHECK66]]
; CHECK-NEXT: [[DIFF_CHECK68:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[CONFLICT_RDX69:%.*]] = or i1 [[CONFLICT_RDX67]], [[DIFF_CHECK68]]
; CHECK-NEXT: [[DIFF_CHECK70:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[CONFLICT_RDX71:%.*]] = or i1 [[CONFLICT_RDX69]], [[DIFF_CHECK70]]
; CHECK-NEXT: [[DIFF_CHECK72:%.*]] = icmp ult i64 [[TMP6]], 32
; CHECK-NEXT: [[CONFLICT_RDX73:%.*]] = or i1 [[CONFLICT_RDX71]], [[DIFF_CHECK72]]
; CHECK-NEXT: [[DIFF_CHECK74:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK75:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK76:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[DIFF_CHECK77:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[DIFF_CHECK78:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[DIFF_CHECK79:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[DIFF_CHECK80:%.*]] = icmp ult i64 [[TMP5]], 32
; CHECK-NEXT: [[DIFF_CHECK81:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK82:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK83:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[DIFF_CHECK84:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[DIFF_CHECK85:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[DIFF_CHECK86:%.*]] = icmp ult i64 [[TMP4]], 32
; CHECK-NEXT: [[DIFF_CHECK87:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK88:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK89:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[DIFF_CHECK90:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[DIFF_CHECK91:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[DIFF_CHECK92:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK93:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK94:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[DIFF_CHECK95:%.*]] = icmp ult i64 [[TMP2]], 32
; CHECK-NEXT: [[DIFF_CHECK96:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK97:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK98:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[DIFF_CHECK99:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: [[DIFF_CHECK100:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[DIFF_CHECK101:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
; CHECK-NEXT: br i1 [[CONFLICT_RDX73]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP101:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP102:%.*]] = add nsw i64 [[TMP101]], -5
; CHECK-NEXT: [[TMP103:%.*]] = add i64 [[TMP102]], [[OFF]]
; CHECK-NEXT: [[TMP104:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP103]]
; CHECK-NEXT: [[TMP105:%.*]] = getelementptr double, ptr [[TMP104]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP105]], align 8
; CHECK-NEXT: [[TMP106:%.*]] = add i64 [[TMP102]], [[OFF_MUL_2]]
; CHECK-NEXT: [[TMP107:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP106]]
; CHECK-NEXT: [[TMP108:%.*]] = getelementptr double, ptr [[TMP107]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP108]], align 8
; CHECK-NEXT: [[TMP109:%.*]] = add i64 [[TMP102]], [[OFF_MUL_3]]
; CHECK-NEXT: [[TMP110:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP109]]
; CHECK-NEXT: [[TMP111:%.*]] = getelementptr double, ptr [[TMP110]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP111]], align 8
; CHECK-NEXT: [[TMP112:%.*]] = add i64 [[TMP102]], [[OFF_MUL_4]]
; CHECK-NEXT: [[TMP113:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP112]]
; CHECK-NEXT: [[TMP114:%.*]] = getelementptr double, ptr [[TMP113]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP114]], align 8
; CHECK-NEXT: [[TMP115:%.*]] = add i64 [[TMP102]], [[OFF_MUL_5]]
; CHECK-NEXT: [[TMP116:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP115]]
; CHECK-NEXT: [[TMP117:%.*]] = getelementptr double, ptr [[TMP116]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP117]], align 8
; CHECK-NEXT: [[TMP118:%.*]] = add i64 [[TMP102]], [[OFF_MUL_6]]
; CHECK-NEXT: [[TMP119:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP118]]
; CHECK-NEXT: [[TMP120:%.*]] = getelementptr double, ptr [[TMP119]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP120]], align 8
; CHECK-NEXT: [[TMP121:%.*]] = add i64 [[TMP102]], [[OFF_MUL_7]]
; CHECK-NEXT: [[TMP122:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP121]]
; CHECK-NEXT: [[TMP123:%.*]] = getelementptr double, ptr [[TMP122]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP123]], align 8
; CHECK-NEXT: [[TMP124:%.*]] = add i64 [[TMP102]], [[OFF_MUL_8]]
; CHECK-NEXT: [[TMP125:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP124]]
; CHECK-NEXT: [[TMP126:%.*]] = getelementptr double, ptr [[TMP125]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP126]], align 8
; CHECK-NEXT: [[TMP127:%.*]] = add i64 [[TMP102]], [[OFF_MUL_9]]
; CHECK-NEXT: [[TMP128:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP127]]
; CHECK-NEXT: [[TMP129:%.*]] = getelementptr double, ptr [[TMP128]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP129]], align 8
; CHECK-NEXT: [[TMP130:%.*]] = add i64 [[TMP102]], [[OFF_MUL_10]]
; CHECK-NEXT: [[TMP131:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP130]]
; CHECK-NEXT: [[TMP132:%.*]] = getelementptr double, ptr [[TMP131]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP132]], align 8
; CHECK-NEXT: [[TMP133:%.*]] = add i64 [[TMP102]], [[OFF_MUL_11]]
; CHECK-NEXT: [[TMP134:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP133]]
; CHECK-NEXT: [[TMP135:%.*]] = getelementptr double, ptr [[TMP134]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP135]], align 8
; CHECK-NEXT: [[TMP136:%.*]] = add i64 [[TMP102]], [[OFF_MUL_12]]
; CHECK-NEXT: [[TMP137:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP136]]
; CHECK-NEXT: [[TMP138:%.*]] = getelementptr double, ptr [[TMP137]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP138]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP10]], -5
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[OFF]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP14]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[OFF_MUL_2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP17]], align 8
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP11]], [[OFF_MUL_3]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP11]], [[OFF_MUL_4]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[TMP22]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP23]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP11]], [[OFF_MUL_5]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP25]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP26]], align 8
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP11]], [[OFF_MUL_6]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP29]], align 8
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP11]], [[OFF_MUL_7]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP32]], align 8
; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP11]], [[OFF_MUL_8]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP33]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP35]], align 8
; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP11]], [[OFF_MUL_9]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP37]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP38]], align 8
; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP11]], [[OFF_MUL_10]]
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP39]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr double, ptr [[TMP40]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP41]], align 8
; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP11]], [[OFF_MUL_11]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP42]]
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[TMP43]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP44]], align 8
; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP11]], [[OFF_MUL_12]]
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr double, ptr [[TMP46]], i32 0
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP47]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP139:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP139]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]

View File

@@ -114,8 +114,8 @@ define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[A2]], 2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[B1]], [[TMP0]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B1]], -2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph
;
@@ -148,7 +148,7 @@ define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[A1]], 2
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A1]], 2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[B2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %scalar.ph, label %vector.ph
@@ -282,19 +282,15 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC:%.*]] to i64
; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N:%.*]], 2
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DST1]], [[SRC2]]
; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
; CHECK: outer.loop:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_EXIT:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], [[OUTER_IV]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[DST1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[SRC2]], [[TMP1]]
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[OUTER_IV]], [[N]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[SUB]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
;
entry:

View File

@@ -413,18 +413,18 @@ define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %ds
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[WIDE_M:%.*]] = zext i32 [[M]] to i64
; CHECK-NEXT: [[WIDE_N:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[WIDE_N]], 2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[SRC2]]
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[WIDE_N]], 2
; CHECK-NEXT: br label [[OUTER_LOOP:%.*]]
; CHECK: outer.loop:
; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[INNER_LOOP_EXIT:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], [[IV_OUTER]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[DST1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[IV_OUTER]]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[IV_OUTER]], [[TMP0]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 16
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4
@@ -436,7 +436,7 @@ define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %ds
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP11]], align 4
@@ -453,7 +453,7 @@ define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %ds
; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV_INNER]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP4]]
; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[IV_INNER]], [[TMP5]]
; CHECK-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX6_US]], align 4
; CHECK-NEXT: [[IV_INNER_NEXT]] = add nuw nsw i64 [[IV_INNER]], 1