Commit dd5991cc modified the aliasing checks here to allow transforming
a memcpy where the source and destination point into the same object.
However, the change accidentally made the code skip the alias check for
other operations in the loop.
Instead of completely skipping the alias check, just skip the check for
whether the memcpy aliases itself.
Differential Revision: https://reviews.llvm.org/D126486
1620 lines
71 KiB
LLVM
1620 lines
71 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
|
|
|
; For @test11_pattern
|
|
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1]
|
|
|
|
; For @test13_pattern
|
|
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G]
|
|
|
|
target triple = "x86_64-apple-darwin10.0.0"
|
|
|
|
define void @test1(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure memset is formed for larger than 1 byte stores, and that the
|
|
; alignment of the store is preserved
|
|
define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1_i16(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE1:%.*]] = bitcast i16* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 1
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 2 [[BASE1]], i8 0, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i16, i16* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
|
|
store i16 0, i16* %I.0.014, align 2
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test1a(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test2(i32* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[BASE1:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP10:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE]], 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[BASE1]], i8 1, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[I_011]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp10 = icmp eq i64 %Size, 0
|
|
br i1 %cmp10, label %for.end, label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
|
|
store i32 16843009, i32* %add.ptr.i, align 4
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a case where there is an extra may-aliased store in the loop, we can't
|
|
; promote the memset.
|
|
define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
|
|
; CHECK-LABEL: @test3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[I_011]]
|
|
; CHECK-NEXT: store i32 16843009, i32* [[ADD_PTR_I]], align 4
|
|
; CHECK-NEXT: store i8 42, i8* [[MAYALIAS:%.*]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_011]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011
|
|
store i32 16843009, i32* %add.ptr.i, align 4
|
|
|
|
store i8 42, i8* %MayAlias
|
|
%inc = add nsw i64 %i.011, 1
|
|
%exitcond = icmp eq i64 %inc, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
; Make sure the first store in the loop is turned into a memset.
|
|
define void @test4(i8* %Base) nounwind ssp {
|
|
; CHECK-LABEL: @test4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE100:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 1000
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE]], i8 0, i64 100, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 42, i8* [[BASE100]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 100
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
%Base100 = getelementptr i8, i8* %Base, i64 1000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
|
|
;; Store beyond the range memset, should be safe to promote.
|
|
store i8 42, i8* %Base100
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 100
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This can't be promoted: the memset is a store of a loop variant value.
|
|
define void @test5(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = trunc i64 [[INDVAR]] to i8
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
|
|
%V = trunc i64 %indvar to i8
|
|
store i8 %V, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
;; memcpy formation
|
|
define void @test6(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 1 [[BASE]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %I.0.014, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_dest_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
%V = load i32, i32* %I.0.014, align 1
|
|
store i32 %V, i32* %DestI, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; memcpy formation, check alignment
|
|
define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test6_src_align(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
|
|
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST1]], i8* align 4 [[BASE2]], i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 4
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
|
|
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
|
|
%V = load i32, i32* %I.0.014, align 4
|
|
store i32 %V, i32* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
|
|
; This is a loop that was rotated but where the blocks weren't merged. This
|
|
; shouldn't perturb us.
|
|
define void @test7(i8* %Base, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test7(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[BASE:%.*]], i8 0, i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY_CONT:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_CONT]]
|
|
; CHECK: for.body.cont:
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
|
|
br label %for.body.cont
|
|
for.body.cont:
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
store i8 0, i8* %I.0.014, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; This is a loop should not be transformed, it only executes one iteration.
|
|
define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test8(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[PI:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i64 0, i64* [[PI]], align 8
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%PI = getelementptr i64, i64* %Ptr, i64 %indvar
|
|
store i64 0, i64 *%PI
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 1
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare i8* @external(i8*)
|
|
|
|
;; This cannot be transformed into a memcpy, because the read-from location is
|
|
;; mutated by the loop.
|
|
define void @test9(i64 %Size) nounwind ssp {
|
|
; CHECK-LABEL: @test9(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASE:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[DEST:%.*]] = alloca i8, i32 10000, align 1
|
|
; CHECK-NEXT: [[BASEALIAS:%.*]] = call i8* @external(i8* [[BASE]])
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[I_0_014]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: store i8 4, i8* [[BASEALIAS]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%Base = alloca i8, i32 10000
|
|
%Dest = alloca i8, i32 10000
|
|
|
|
%BaseAlias = call i8* @external(i8* %Base)
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %I.0.014, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
|
|
;; This store can clobber the input.
|
|
store i8 4, i8* %BaseAlias
|
|
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop should be promoted to one big memset.
|
|
define void @test10(i8* %X) nounwind ssp {
|
|
; CHECK-LABEL: @test10(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[BB_NPH:%.*]]
|
|
; CHECK: bb.nph:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC10:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC12:%.*]], [[FOR_INC10]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[INDVAR]], 100
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[X]], i64 [[TMP0]]
|
|
; CHECK-NEXT: br label [[FOR_BODY5:%.*]]
|
|
; CHECK: for.body5:
|
|
; CHECK-NEXT: [[J_02:%.*]] = phi i32 [ 0, [[BB_NPH]] ], [ [[INC:%.*]], [[FOR_BODY5]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_04]], 100
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_02]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[J_02]], 1
|
|
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[INC]], 100
|
|
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_INC10]], label [[FOR_BODY5]]
|
|
; CHECK: for.inc10:
|
|
; CHECK-NEXT: [[INC12]] = add nsw i32 [[I_04]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC12]], 100
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END13:%.*]], label [[BB_NPH]]
|
|
; CHECK: for.end13:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry, %for.inc10
|
|
%i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
|
|
br label %for.body5
|
|
|
|
for.body5: ; preds = %for.body5, %bb.nph
|
|
%j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
|
|
%mul = mul nsw i32 %i.04, 100
|
|
%add = add nsw i32 %j.02, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
|
|
store i8 0, i8* %arrayidx, align 1
|
|
%inc = add nsw i32 %j.02, 1
|
|
%cmp4 = icmp eq i32 %inc, 100
|
|
br i1 %cmp4, label %for.inc10, label %for.body5
|
|
|
|
for.inc10: ; preds = %for.body5
|
|
%inc12 = add nsw i32 %i.04, 1
|
|
%cmp = icmp eq i32 %inc12, 100
|
|
br i1 %cmp, label %for.end13, label %bb.nph
|
|
|
|
for.end13: ; preds = %for.inc10
|
|
ret void
|
|
}
|
|
|
|
; On darwin10 (which is the triple in this .ll file) this loop can be turned
|
|
; into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test11_pattern(i32* nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test11_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @memset_pattern16(i8* [[P1]], i8* bitcast ([4 x i32]* @.memset_pattern to i8*), i64 40000)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32, i32* %P, i64 %indvar
|
|
store i32 1, i32* %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Store of null should turn into memset of zero.
|
|
define void @test12(i32** nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test12(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32** [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P1]], i8 0, i64 80000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32*, i32** [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32*, i32** %P, i64 %indvar
|
|
store i32* null, i32** %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
@G = global i32 5
|
|
|
|
; This store-of-address loop can be turned into a memset_pattern call.
|
|
; rdar://9009151
|
|
define void @test13_pattern(i32** nocapture %P) nounwind ssp {
|
|
; CHECK-LABEL: @test13_pattern(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = bitcast i32** [[P:%.*]] to i8*
|
|
; CHECK-NEXT: call void @memset_pattern16(i8* [[P1]], i8* bitcast ([2 x i32*]* @.memset_pattern.1 to i8*), i64 80000)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32*, i32** [[P]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
|
%arrayidx = getelementptr i32*, i32** %P, i64 %indvar
|
|
store i32* @G, i32** %arrayidx, align 4
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
; PR9815 - This is a partial overlap case that cannot be safely transformed
|
|
; into a memcpy.
|
|
@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
|
|
|
define i32 @test14() nounwind {
|
|
; CHECK-LABEL: @test14(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[T5:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T5]], 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[T5]], 5
|
|
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 [[IDXPROM5]]
|
|
; CHECK-NEXT: store i32 [[T2]], i32* [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[T5]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 2
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: [[T8:%.*]] = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
|
|
; CHECK-NEXT: ret i32 [[T8]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.lr.ph
|
|
%t5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
%add = add nsw i32 %t5, 4
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
|
|
%t2 = load i32, i32* %arrayidx, align 4
|
|
%add4 = add nsw i32 %t5, 5
|
|
%idxprom5 = sext i32 %add4 to i64
|
|
%arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
|
|
store i32 %t2, i32* %arrayidx6, align 4
|
|
%inc = add nsw i32 %t5, 1
|
|
%cmp = icmp slt i32 %inc, 2
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.inc
|
|
%t8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4
|
|
ret i32 %t8
|
|
|
|
}
|
|
|
|
define void @PR14241(i32* %s, i64 %size) {
|
|
; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
|
|
; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
|
|
; instead of a memmove. If we get the memmove transform back, this will catch
|
|
; regressions.
|
|
;
|
|
; CHECK-LABEL: @PR14241(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[S1:%.*]] = bitcast i32* [[S:%.*]] to i8*
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[S]], i64 1
|
|
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP4]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR]], align 4
|
|
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 0
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr inbounds i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
|
|
%val = load i32, i32* %src.ptr, align 4
|
|
%dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
|
|
store i32 %val, i32* %dst.ptr, align 4
|
|
%next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
|
|
%cmp = icmp eq i32* %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Recognize loops with a negative stride.
|
|
define void @test15(i32* nocapture %f) {
|
|
; CHECK-LABEL: @test15(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[F1:%.*]] = bitcast i32* [[F:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[F1]], i8 0, i64 262148, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
|
store i32 0, i32* %arrayidx, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
|
|
; the creation of a memset.
|
|
define void @test16(i32* nocapture %f) {
|
|
; CHECK-LABEL: @test16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 65536
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 65536, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
|
store i32 0, i32* %arrayidx, align 4
|
|
store i32 1, i32* %arrayidx1, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
|
|
; CHECK-LABEL: @test17(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[C:%.*]] to i64
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[CONV]], 2
|
|
; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 [[MUL]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
|
|
; CHECK-NEXT: [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
|
|
; CHECK: while.body.preheader:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C]], -1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], [[TMP5]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
|
|
; CHECK-NEXT: [[SCEVGEP12:%.*]] = bitcast i32* [[SCEVGEP1]] to i8*
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[C]] to i64
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP9]], i1 false)
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[DEC10]] = add nsw i32 [[DEC10_IN]], -1
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0
|
|
; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: while.end.loopexit:
|
|
; CHECK-NEXT: br label [[WHILE_END]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret i32* [[TMP0]]
|
|
;
|
|
entry:
|
|
%conv = sext i32 %c to i64
|
|
%mul = shl nsw i64 %conv, 2
|
|
%call = tail call noalias i8* @malloc(i64 %mul)
|
|
%0 = bitcast i8* %call to i32*
|
|
%tobool.9 = icmp eq i32 %c, 0
|
|
br i1 %tobool.9, label %while.end, label %while.body.preheader
|
|
|
|
while.body.preheader: ; preds = %entry
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.preheader, %while.body
|
|
%dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
|
|
%dec10 = add nsw i32 %dec10.in, -1
|
|
%idxprom = sext i32 %dec10 to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
|
|
store i32 %1, i32* %arrayidx2, align 4
|
|
%tobool = icmp eq i32 %dec10, 0
|
|
br i1 %tobool, label %while.end.loopexit, label %while.body
|
|
|
|
while.end.loopexit: ; preds = %while.body
|
|
br label %while.end
|
|
|
|
while.end: ; preds = %while.end.loopexit, %entry
|
|
ret i32* %0
|
|
}
|
|
|
|
declare noalias i8* @malloc(i64)
|
|
|
|
; Handle memcpy-able loops with negative stride.
|
|
; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
|
|
; for (int i = 2047; i >= 0; --i) {
|
|
; a[i] = b[i];
|
|
; }
|
|
; }
|
|
define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
|
|
; CHECK-LABEL: @test18(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
|
|
; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A1]], i8* align 4 [[B2]], i64 8192, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 2047, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
store i32 %0, i32* %arrayidx2, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Two dimensional nested loop with negative stride should be promoted to one big memset.
|
|
define void @test19(i8* nocapture %X) {
|
|
; CHECK-LABEL: @test19(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[X:%.*]], i8 0, i64 10000, i1 false)
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_INC4:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 99, [[ENTRY]] ], [ [[DEC5:%.*]], [[FOR_INC4]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR]], -100
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 9900
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[X]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_06]], 100
|
|
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
|
|
; CHECK: for.body3:
|
|
; CHECK-NEXT: [[J_05:%.*]] = phi i32 [ 99, [[FOR_COND1_PREHEADER]] ], [ [[DEC:%.*]], [[FOR_BODY3]] ]
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[J_05]], [[MUL]]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[DEC]] = add nsw i32 [[J_05]], -1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[J_05]], 0
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC4]]
|
|
; CHECK: for.inc4:
|
|
; CHECK-NEXT: [[DEC5]] = add nsw i32 [[I_06]], -1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I_06]], 0
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_END6:%.*]]
|
|
; CHECK: for.end6:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader: ; preds = %entry, %for.inc4
|
|
%i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
|
|
%mul = mul nsw i32 %i.06, 100
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.cond1.preheader, %for.body3
|
|
%j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
|
|
%add = add nsw i32 %j.05, %mul
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
|
|
store i8 0, i8* %arrayidx, align 1
|
|
%dec = add nsw i32 %j.05, -1
|
|
%cmp2 = icmp sgt i32 %j.05, 0
|
|
br i1 %cmp2, label %for.body3, label %for.inc4
|
|
|
|
for.inc4: ; preds = %for.body3
|
|
%dec5 = add nsw i32 %i.06, -1
|
|
%cmp = icmp sgt i32 %i.06, 0
|
|
br i1 %cmp, label %for.cond1.preheader, label %for.end6
|
|
|
|
for.end6: ; preds = %for.inc4
|
|
ret void
|
|
}
|
|
|
|
; Handle loops where the trip count is a narrow integer that needs to be
|
|
; extended.
|
|
define void @form_memset_narrow_size(i64* %ptr, i32 %size) {
|
|
; CHECK-LABEL: @form_memset_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[PTR1:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[PTR1]], i8 0, i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom = sext i32 %storemerge4 to i64
|
|
%arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom
|
|
store i64 0, i64* %arrayidx, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) {
|
|
; CHECK-LABEL: @form_memcpy_narrow_size(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DST1:%.*]] = bitcast i64* [[DST:%.*]] to i8*
|
|
; CHECK-NEXT: [[SRC2:%.*]] = bitcast i64* [[SRC:%.*]] to i8*
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_PH:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST1]], i8* align 8 [[SRC2]], i64 [[TMP1]], i1 false)
|
|
; CHECK-NEXT: br label [[LOOP_BODY:%.*]]
|
|
; CHECK: loop.body:
|
|
; CHECK-NEXT: [[STOREMERGE4:%.*]] = phi i32 [ 0, [[LOOP_PH]] ], [ [[INC:%.*]], [[LOOP_BODY]] ]
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[SRC]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
|
|
; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[STOREMERGE4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DST]], i64 [[IDXPROM2]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[STOREMERGE4]], 1
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_BODY]], label [[LOOP_EXIT:%.*]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: br label [[EXIT]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp1 = icmp sgt i32 %size, 0
|
|
br i1 %cmp1, label %loop.ph, label %exit
|
|
|
|
loop.ph:
|
|
br label %loop.body
|
|
|
|
loop.body:
|
|
%storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ]
|
|
%idxprom1 = sext i32 %storemerge4 to i64
|
|
%arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1
|
|
%v = load i64, i64* %arrayidx1, align 8
|
|
%idxprom2 = sext i32 %storemerge4 to i64
|
|
%arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2
|
|
store i64 %v, i64* %arrayidx2, align 8
|
|
%inc = add nsw i32 %storemerge4, 1
|
|
%cmp2 = icmp slt i32 %inc, %size
|
|
br i1 %cmp2, label %loop.body, label %loop.exit
|
|
|
|
loop.exit:
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_positive_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_positive_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
|
|
|
|
;; Memmove formation. We expect exactly same memmove result like in PR46179_positive_stride output.
|
|
define void @loop_with_memcpy_PR46179_positive_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_PR46179_positive_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @PR46179_negative_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @PR46179_negative_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nsw i64 %indvar, -1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%V = load i8, i8* %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
store i8 %V, i8* %DestI, align 1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation. We expect exactly same memmove result like in PR46179_negative_stride output.
|
|
define void @loop_with_memcpy_PR46179_negative_stride(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_PR46179_negative_stride(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nsw i64 %indvar, -1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Memmove formation.
|
|
define void @loop_with_memcpy_stride16(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @loop_with_memcpy_stride16(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 16
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SIZE:%.*]], i64 16)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 16
|
|
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[TMP3]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ]
|
|
; CHECK-NEXT: [[STEP]] = add nuw nsw i64 [[INDVAR]], 16
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[STEP]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %bb.nph
|
|
%indvar = phi i64 [ %Step, %for.body ], [ 0, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 16
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 16, i1 false)
|
|
%exitcond = icmp slt i64 %Step, %Size
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from previous load when stride is positive.
|
|
define void @do_not_form_memmove1(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove1(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, -1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from previous load in memcpy when stride is positive.
|
|
define void @do_not_form_memmove2(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove2(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, -1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from next load when stride is negative.
|
|
define void @do_not_form_memmove3(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove3(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%V = load i8, i8* %SrcI, align 1
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add nsw i64 %indvar, -1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove from next load in memcpy when stride is negative.
|
|
define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove4(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
|
|
; CHECK: for.end.loopexit:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%cmp1 = icmp sgt i64 %Size, 0
|
|
br i1 %cmp1, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %bb.nph, %.for.body
|
|
%indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
%indvar.next = add nsw i64 %indvar, -1
|
|
%exitcond = icmp sgt i64 %indvar, 1
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %.for.body, %bb.nph
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when underaligned load is overlapped with store.
|
|
define void @do_not_form_memmove5(i32* %s, i64 %size) {
|
|
; CHECK-LABEL: @do_not_form_memmove5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
|
|
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
|
|
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; CHECK: while.body:
|
|
; CHECK-NEXT: [[PHI_PTR:%.*]] = phi i32* [ [[S]], [[ENTRY:%.*]] ], [ [[NEXT_PTR:%.*]], [[WHILE_BODY]] ]
|
|
; CHECK-NEXT: [[NEXT:%.*]] = bitcast i32* [[PHI_PTR]] to i16*
|
|
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr i16, i16* [[NEXT]], i64 1
|
|
; CHECK-NEXT: [[SRC_PTR2:%.*]] = bitcast i16* [[SRC_PTR]] to i32*
|
|
; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[SRC_PTR2]], align 2
|
|
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr i32, i32* [[PHI_PTR]], i64 0
|
|
; CHECK-NEXT: store i32 [[VAL]], i32* [[DST_PTR]], align 4
|
|
; CHECK-NEXT: [[NEXT_PTR]] = getelementptr i32, i32* [[PHI_PTR]], i64 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[NEXT_PTR]], [[END_PTR]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%end.idx = add i64 %size, -1
|
|
%end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
|
|
%next = bitcast i32* %phi.ptr to i16*
|
|
%src.ptr = getelementptr i16, i16* %next, i64 1
|
|
%src.ptr2 = bitcast i16* %src.ptr to i32*
|
|
; below underaligned load is overlapped with store.
|
|
%val = load i32, i32* %src.ptr2, align 2
|
|
%dst.ptr = getelementptr i32, i32* %phi.ptr, i64 0
|
|
store i32 %val, i32* %dst.ptr, align 4
|
|
%next.ptr = getelementptr i32, i32* %phi.ptr, i64 1
|
|
%cmp = icmp eq i32* %next.ptr, %end.ptr
|
|
br i1 %cmp, label %exit, label %while.body
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove for memcpy with aliasing store.
|
|
define void @do_not_form_memmove6(i8* %Src, i64 %Size) {
|
|
; CHECK-LABEL: @do_not_form_memmove6(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[BASEALIAS:%.*]] = call i8* @external(i8* [[SRC:%.*]])
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
|
|
; CHECK-NEXT: store i8 4, i8* [[BASEALIAS]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
%BaseAlias = call i8* @external(i8* %Src)
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 1
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Src, i64 %indvar
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
|
|
store i8 4, i8* %BaseAlias
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
;; Do not form memmove when load has more than one use.
|
|
define i32 @do_not_form_memmove7(i32* %p) {
|
|
; CHECK-LABEL: @do_not_form_memmove7(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 15, [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUB]] = add nsw i32 [[INDEX]], -1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INDEX]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[IDXPROM]]
|
|
; CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[SUM]]
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[INDEX]], 1
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
%add.lcssa = phi i32 [ %add, %for.body ]
|
|
ret i32 %add.lcssa
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%index = phi i32 [ 15, %entry ], [ %sub, %for.body ]
|
|
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
%sub = add nsw i32 %index, -1
|
|
%0 = zext i32 %sub to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %p, i64 %0
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
%idxprom = zext i32 %index to i64
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %p, i64 %idxprom
|
|
store i32 %1, i32* %arrayidx2, align 4
|
|
%add = add nsw i32 %1, %sum
|
|
%cmp = icmp sgt i32 %index, 1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
; Do not form memmove when there's an aliasing operation, even
|
|
; if the memcpy source and destination are in the same object.
|
|
define void @do_not_form_memmove8(i64* %p) {
|
|
; CHECK-LABEL: @do_not_form_memmove8(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 1000
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[X5:%.*]] = zext i32 [[X4]] to i64
|
|
; CHECK-NEXT: [[X7:%.*]] = getelementptr inbounds i64, i64* [[P2]], i64 [[X5]]
|
|
; CHECK-NEXT: [[X8:%.*]] = bitcast i64* [[X7]] to i8*
|
|
; CHECK-NEXT: store i64 1, i64* [[X7]], align 4
|
|
; CHECK-NEXT: [[X11:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[X5]]
|
|
; CHECK-NEXT: [[X12:%.*]] = bitcast i64* [[X11]] to i8*
|
|
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[X12]], i8* [[X8]], i64 8, i1 false)
|
|
; CHECK-NEXT: [[X13]] = add i32 [[X4]], 1
|
|
; CHECK-NEXT: [[X14:%.*]] = icmp eq i32 [[X13]], 44
|
|
; CHECK-NEXT: br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
|
|
;
|
|
entry:
|
|
%p2 = getelementptr inbounds i64, i64* %p, i64 1000
|
|
br label %loop
|
|
|
|
exit:
|
|
ret void
|
|
|
|
loop:
|
|
%x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
|
|
%x5 = zext i32 %x4 to i64
|
|
%x7 = getelementptr inbounds i64, i64* %p2, i64 %x5
|
|
%x8 = bitcast i64* %x7 to i8*
|
|
store i64 1, i64* %x7, align 4
|
|
%x11 = getelementptr inbounds i64, i64* %p, i64 %x5
|
|
%x12 = bitcast i64* %x11 to i8*
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x12, i8* %x8, i64 8, i1 false)
|
|
%x13 = add i32 %x4, 1
|
|
%x14 = icmp eq i32 %x13, 44
|
|
br i1 %x14, label %exit, label %loop
|
|
}
|
|
|
|
;; Memcpy formation is still preferred over memmove.
|
|
define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
|
|
; CHECK-LABEL: @prefer_memcpy_over_memmove(
|
|
; CHECK-NEXT: bb.nph:
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 42
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST:%.*]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 42
|
|
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
|
|
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[DEST]], i64 [[INDVAR]]
|
|
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
|
|
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb.nph:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %bb.nph, %for.body
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
|
|
%Step = add nuw nsw i64 %indvar, 42
|
|
%SrcI = getelementptr i8, i8* %Src, i64 %Step
|
|
%DestI = getelementptr i8, i8* %Dest, i64 %indvar
|
|
%V = load i8, i8* %SrcI, align 1
|
|
store i8 %V, i8* %DestI, align 1
|
|
%indvar.next = add i64 %indvar, 1
|
|
%exitcond = icmp eq i64 %indvar.next, %Size
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; Validate that "memset_pattern" has the proper attributes.
|