This fixes a violation of the wrap flag rules introduced in c4048d8f. This is an alternate fix to D106852.
The basic problem being fixed is that we infer a set of flags which is valid at some inner scope S1 (usually by correctly propagating them from IR), and then (incorrectly) extend them to a SCEV in scope S2 where S1 != S2. This is not in general safe per the wrap flags semantics recently defined.
In this patch, I include a simple inference step to handle the case where we can prove that S2 is the preheader of the loop S1, and that entry into S2 implies execution of S1. See the code for a more detailed explanation.
One worry I have with this patch is that I might be over-fitting what shows up in tests - and thus hiding negative impact we'd see in the real world. My best defense is that the rule used here very closely follows the one used to propagate the flags from IR to the inner add to start with, and thus if one is reasonable, so probably is the other. Curious what others think about that piece.
The test diffs are roughly as expected. Mostly analysis only, with two transform changes. Oddly, the result looks better in the loop-idiom test, and I don't understand the PPC output enough to have tell. Nothing terrible looking though. (For context, without the scope inference peephole, the test delta includes a couple of vectorization tests. Again, not super concerning, but slightly more so.)
Differential Revision: https://reviews.llvm.org/D109845
1387 lines
60 KiB
LLVM
1387 lines
60 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
; For @test11_pattern
|
|
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1]
|
|
|
|
; For @test13_pattern
|
|
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G]
|
|
|
|
target triple = "x86_64-apple-darwin10.0.0"
|
|
|
|
define void @test1(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure memset is formed for larger than 1 byte stores, and that the
|
|
; alignment of the store is preserved
|
|
define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1_i16(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE1:%.*]] = bitcast i16* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 1
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 2 [[BASE1]], i8 0, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i16, i16* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
|
|
store i16 0, i16* %I.0.014, align 2
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1a(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test2(i32* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[BASE1:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP10:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE]], 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[BASE1]], i8 1, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[I_011]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp10 = icmp eq i64 %Size, 0
|
|
br i1 %cmp10, label %for.end, label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
|
|
store i32 16843009, i32* %add.ptr.i, align 4
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a case where there is an extra may-aliased store in the loop, we can't
|
|
; promote the memset.
|
|
define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[I_011]]
|
|
; CHECK-NEXT: store i32 16843009, i32* [[ADD_PTR_I]], align 4
|
|
; CHECK-NEXT: store i8 42, i8* [[MAYALIAS:%.*]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
|
|
store i32 16843009, i32* %add.ptr.i, align 4
|
|
|
|
store i8 42, i8* %MayAlias
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure the first store in the loop is turned into a memset.
|
|
define void @test4(i8* %Base) nounwind ssp {
|
|
; CHECK-LABEL: @test4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE100:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 1000
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE]], i8 0, i64 100, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 42, i8* [[BASE100]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 100
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
%Base100 = getelementptr i8, i8* %Base, i64 1000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
|
|
;; Store beyond the range memset, should be safe to promote.
|
|
store i8 42, i8* %Base100
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 100
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This can't be promoted: the memset is a store of a loop variant value.
|
|
define void @test5(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = trunc i64 [[INDVAR]] to i8
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
|
|
%V = trunc i64 %indvar to i8
|
|
store i8 %V, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
;; memcpy formation
|
|
define void @test6(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 1 [[BASE]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %I.0.014, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_dest_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
%V = load i32, i32* %I.0.014, align 1
|
|
store i32 %V, i32* %DestI, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_src_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST1]], i8* align 4 [[BASE2]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 4
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
%V = load i32, i32* %I.0.014, align 4
|
|
store i32 %V, i32* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test7(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test7(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop should not be transformed, it only executes one iteration.
|
|
define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test8(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[PI:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i64 0, i64* [[PI]], align 8
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%PI = getelementptr i64, i64* %Ptr, i64 %indvar
|
|
store i64 0, i64 *%PI
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 1
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare i8* @external(i8*)
|
|
|
|
;; This cannot be transformed into a memcpy, because the read-from location is
|
|
;; mutated by the loop.
|
|
define void @test9(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test9(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[BASEALIAS:%.*]] = call i8* @external(i8* [[BASE]])
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: store i8 4, i8* [[BASEALIAS]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
|
|
%BaseAlias = call i8* @external(i8* %Base)
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %I.0.014, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
|
|
;; This store can clobber the input.
|
|
store i8 4, i8* %BaseAlias
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop should be promoted to one big memset.
|
|
define void @test10(i8* %X) nounwind ssp {
|
|
; CHECK-LABEL: @test10(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[BB_NPH:%.*]]
|
|
; CHECK: bb.nph:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC10:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC12:%.*]], [[FOR_INC10]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[INDVAR]], 100
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[X]], i64 [[TMP0]]
|
|
; CHECK-NEXT: br label [[FOR_BODY5:%.*]]
|
|
; CHECK: for.body5:
|
|
; CHECK-NEXT: [[J_02:%.*]] = phi i32 [ 0, [[BB_NPH]] ], [ [[INC:%.*]], [[FOR_BODY5]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_04]], 100
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_02]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[J_02]], 1
|
|
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[INC]], 100
|
|
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_INC10]], label [[FOR_BODY5]]
|
|
; CHECK: for.inc10:
|
|
; CHECK-NEXT: [[INC12]] = add nsw i32 [[I_04]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC12]], 100
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END13:%.*]], label [[BB_NPH]]
|
|
; CHECK: for.end13:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry, %for.inc10
|
|
%i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
|
|
br label %for.body5
|
|
|
|
for.body5: ; preds = %for.body5, %bb.nph
|
|
%j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
|
|
%mul = mul nsw i32 %i.04, 100
|
|
%add = add nsw i32 %j.02, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
|
|
store i8 0, i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %j.02, 1
|
|
%cmp4 = icmp eq i32 %inc, 100
|
|
br i1 %cmp4, label %for.inc10, label %for.body5
|
|
|
|
for.inc10: ; preds = %for.body5
|
|
%inc12 = add nsw i32 %i.04, 1
|
|
%cmp = icmp eq i32 %inc12, 100
|
|
br i1 %cmp, label %for.end13, label %bb.nph
|
|
|
|
for.end13: ; preds = %for.inc10
|
|
ret void
|
|
}
|
|
|
|
; On darwin10 (which is the triple in this .ll file) this loop can be turned
|
|
; into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test11_pattern(i32* nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test11_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @memset_pattern16(i8* [[P1]], i8* bitcast ([4 x i32]* @.memset_pattern to i8*), i64 40000)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32, i32* %P, i64 %indvar
|
|
store i32 1, i32* %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Store of null should turn into memset of zero.
|
|
define void @test12(i32** nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test12(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32** [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P1]], i8 0, i64 80000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32*, i32** [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32*, i32** %P, i64 %indvar
|
|
store i32* null, i32** %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
@G = global i32 5
|
|
|
|
; This store-of-address loop can be turned into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test13_pattern(i32** nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test13_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32** [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @memset_pattern16(i8* [[P1]], i8* bitcast ([2 x i32*]* @.memset_pattern.1 to i8*), i64 80000)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32*, i32** [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32*, i32** %P, i64 %indvar
|
|
store i32* @G, i32** %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
; PR9815 - This is a partial overlap case that cannot be safely transformed
|
|
; into a memcpy.
|
|
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
|
|
|
define i32 @test14() nounwind {
|
|
; CHECK-LABEL: @test14(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
|
|
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 [[IDXPROM5]]
|
|
; CHECK-NEXT: store i32 [[T2]], i32* [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: [[T8:%.*]] = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
|
|
; CHECK-NEXT: ret i32 [[T8]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.lr.ph
|
|
%t5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add = add nsw i32 %t5, 4
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
|
|
%t2 = load i32, i32* %arrayidx, align 4
|
|
%add4 = add nsw i32 %t5, 5
|
|
%idxprom5 = sext i32 %add4 to i64
|
|
%arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
|
|
store i32 %t2, i32* %arrayidx6, align 4
|
|
%inc = add nsw i32 %t5, 1
|
|
%cmp = icmp slt i32 %inc, 2
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.inc
|
|
%t8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
|
|
ret i32 %t8
|
|
|
|
}
|
|
|
|
define void @PR14241(i32* %s, i64 %size) {
|
|
; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
|
|
; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
|
|
; instead of a memmove. If we get the memmove transform back, this will catch
|
|
; regressions.
|
|
;
|
|
; CHECK-LABEL: @PR14241(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[S1:%.*]] = bitcast i32* [[S:%.*]] to i8*
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[S]], i64 1
|
|
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP4]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR]], align 4
|
|
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 0
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
|
|
%val = load i32, i32* %src.ptr, align 4
|
|
%dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
|
|
store i32 %val, i32* %dst.ptr, align 4
|
|
%next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
|
|
%cmp = icmp eq i32* %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Recognize loops with a negative stride.
|
|
define void @test15(i32* nocapture %f) {
|
|
; CHECK-LABEL: @test15(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[F1:%.*]] = bitcast i32* [[F:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[F1]], i8 0, i64 262148, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
|
store i32 0, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
|
|
; the creation of a memset.
|
|
define void @test16(i32* nocapture %f) {
|
|
; CHECK-LABEL: @test16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 65536
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
|
store i32 0, i32* %arrayidx, align 4
|
|
store i32 1, i32* %arrayidx1, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
|
|
; CHECK-LABEL: @test17(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[C:%.*]] to i64
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
|
|
; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 [[MUL]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
|
|
; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
|
|
; CHECK: while.body.preheader:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C]], -1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], [[TMP5]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[SCEVGEP12:%.*]] = bitcast i32* [[SCEVGEP1]] to i8*
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[C]] to i64
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP9]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: while.end.loopexit:
|
|
; CHECK-NEXT: br label [[WHILE_END]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret i32* [[TMP0]]
|
|
;
|
|
entry:
|
|
%conv = sext i32 %c to i64
|
|
%mul = shl nsw i64 %conv, 2
|
|
%call = tail call noalias i8* @malloc(i64 %mul)
|
|
%0 = bitcast i8* %call to i32*
|
|
%tobool.9 = icmp eq i32 %c, 0
|
|
br i1 %tobool.9, label %while.end, label %while.body.preheader
|
|
|
|
while.body.preheader: ; preds = %entry
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.preheader, %while.body
|
|
%dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
|
|
%dec10 = add nsw i32 %dec10.in, -1
|
|
%idxprom = sext i32 %dec10 to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
|
|
store i32 %1, i32* %arrayidx2, align 4
|
|
%tobool = icmp eq i32 %dec10, 0
|
|
br i1 %tobool, label %while.end.loopexit, label %while.body
|
|
|
|
while.end.loopexit: ; preds = %while.body
|
|
br label %while.end
|
|
|
|
while.end: ; preds = %while.end.loopexit, %entry
|
|
ret i32* %0
|
|
}
|
|
|
|
declare noalias i8* @malloc(i64)
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
|
|
; for (int i = 2047; i >= 0; --i) {
|
|
; a[i] = b[i];
|
|
; }
|
|
; }
|
|
define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
|
|
; CHECK-LABEL: @test18(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
|
|
; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A1]], i8* align 4 [[B2]], i64 8192, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 2047, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
store i32 %0, i32* %arrayidx2, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop with negative stride should be promoted to one big memset.
|
|
define void @test19(i8* nocapture %X) {
|
|
; CHECK-LABEL: @test19(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC4:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 99, [[ENTRY]] ], [ [[DEC5:%.*]], [[FOR_INC4]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR]], -100
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 9900
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[X]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_06]], 100
|
|
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
|
|
; CHECK: for.body3:
|
|
; CHECK-NEXT: [[J_05:%.*]] = phi i32 [ 99, [[FOR_COND1_PREHEADER]] ], [ [[DEC:%.*]], [[FOR_BODY3]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_05]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[DEC]] = add nsw i32 [[J_05]], -1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[J_05]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC4]]
|
|
; CHECK: for.inc4:
|
|
; CHECK-NEXT: [[DEC5]] = add nsw i32 [[I_06]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I_06]], 0
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_END6:%.*]]
|
|
; CHECK: for.end6:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader: ; preds = %entry, %for.inc4
|
|
%i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
|
|
%mul = mul nsw i32 %i.06, 100
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.cond1.preheader, %for.body3
|
|
%j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
|
|
%add = add nsw i32 %j.05, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
|
|
store i8 0, i8* %arrayidx, align 1
|
|
%dec = add nsw i32 %j.05, -1
|
|
%cmp2 = icmp sgt i32 %j.05, 0
|
|
br i1 %cmp2, label %for.body3, label %for.inc4
|
|
|
|
for.inc4: ; preds = %for.body3
|
|
%dec5 = add nsw i32 %i.06, -1
|
|
%cmp = icmp sgt i32 %i.06, 0
|
|
br i1 %cmp, label %for.cond1.preheader, label %for.end6
|
|
|
|
for.end6: ; preds = %for.inc4
|
|
ret void
|
|
}
|
|
|
|
; Handle loops where the trip count is a narrow integer that needs to be
|
|
; extended.
|
|
define void @form_memset_narrow_size(i64* %ptr, i32 %size) {
|
|
; CHECK-LABEL: @form_memset_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTR1:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[PTR1]], i8 0, i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom = sext i32 %storemerge4 to i64
|
|
%arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom
|
|
store i64 0, i64* %arrayidx, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) {
|
|
; CHECK-LABEL: @form_memcpy_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DST1:%.*]] = bitcast i64* [[DST:%.*]] to i8*
|
|
; CHECK-NEXT: [[SRC2:%.*]] = bitcast i64* [[SRC:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST1]], i8* align 8 [[SRC2]], i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[SRC]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
|
; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DST]], i64 [[IDXPROM2]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom1 = sext i32 %storemerge4 to i64
|
|
%arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1
|
|
%v = load i64, i64* %arrayidx1, align 8
|
|
%idxprom2 = sext i32 %storemerge4 to i64
|
|
%arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2
|
|
store i64 %v, i64* %arrayidx2, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_positive_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_positive_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_negative_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_negative_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nsw i64 %indvar, -1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%V = load i8, i8* %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
store i8 %V, i8* %DestI, align 1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from previous store when stride is positive.
|
|
define void @do_not_form_memmove1(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, -1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from next store when stride is negative.
|
|
define void @do_not_form_memmove2(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove2(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%V = load i8, i8* %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add nsw i64 %indvar, -1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when underaligned load is overlapped with store.
|
|
define void @do_not_form_memmove3(i32* %s, i64 %size) {
|
|
; CHECK-LABEL: @do_not_form_memmove3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[NEXT:%.*]] = bitcast i32* [[PHI_PTR]] to i16*
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr i16, i16* [[NEXT]], i64 1
|
|
; CHECK-NEXT: [[SRC_PTR2:%.*]] = bitcast i16* [[SRC_PTR]] to i32*
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR2]], align 2
|
|
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr i32, i32* [[PHI_PTR]], i64 0
|
|
; CHECK-NEXT: store i32 [[VAL]], i32* [[DST_PTR]], align 4
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%next = bitcast i32* %phi.ptr to i16*
|
|
%src.ptr = getelementptr i16, i16* %next, i64 1
|
|
%src.ptr2 = bitcast i16* %src.ptr to i32*
|
|
; below underaligned load is overlapped with store.
|
|
%val = load i32, i32* %src.ptr2, align 2
|
|
%dst.ptr = getelementptr i32, i32* %phi.ptr, i64 0
|
|
store i32 %val, i32* %dst.ptr, align 4
|
|
%next.ptr = getelementptr i32, i32* %phi.ptr, i64 1
|
|
%cmp = icmp eq i32* %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
|
|
|
|
;; FIXME: Do not form memmove from loop body containing memcpy.
|
|
define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when load has more than one use.
|
|
define i32 @do_not_form_memmove5(i32* %p) {
|
|
; CHECK-LABEL: @do_not_form_memmove5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 15, [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUB]] = add nsw i32 [[INDEX]], -1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
%add.lcssa = phi i32 [ %add, %for.body ]
|
|
ret i32 %add.lcssa
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%index = phi i32 [ 15, %entry ], [ %sub, %for.body ]
|
|
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
%sub = add nsw i32 %index, -1
|
|
%0 = zext i32 %sub to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %p, i64 %0
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
%idxprom = zext i32 %index to i64
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %p, i64 %idxprom
|
|
store i32 %1, i32* %arrayidx2, align 4
|
|
%add = add nsw i32 %1, %sum
|
|
%cmp = icmp sgt i32 %index, 1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
;; Memcpy formation is still preferred over memmove.
|
|
define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
|
|
; CHECK-LABEL: @prefer_memcpy_over_memmove(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 42
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST:%.*]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 42
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 42
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Validate that "memset_pattern" has the proper attributes.
|