The current LIR does not deal with runtime-determined memset-size. This patch utilizes SCEV and check if the PointerStrideSCEV and the MemsetSizeSCEV are equal. Before comparison the pass would try to fold the expression that is already protected by the loop guard. Testcase file `memset-runtime.ll`, `memset-runtime-debug.ll` added. This patch deals with proper loop-idiom. Proceeding patch wants to deal with SCEV-s that are inequal after folding with the loop guards. Reviewed By: lebedev.ri, Whitney Differential Revision: https://reviews.llvm.org/D107353
111 lines
4.0 KiB
LLVM
111 lines
4.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes="function(loop(loop-idiom,loop-deletion),simplifycfg)" -S < %s | FileCheck %s
|
|
; The C code to generate this testcase:
|
|
; void test(int n, int m, int *ar)
|
|
; {
|
|
; long i;
|
|
; for (i=0; i<n; ++i) {
|
|
; int *arr = ar + i * m; // ar[i];
|
|
; memset(arr, 0, m * sizeof(int));
|
|
; }
|
|
; }
|
|
; The optimized IR should be similar to the following:
|
|
; void test(int n, int m, int *ar)
|
|
; {
|
|
; memset(ar, 0, m * n * sizeof(int));
|
|
; }
|
|
define void @For_PositiveStride(i32* nocapture %ar, i64 %n, i64 %m) {
|
|
; CHECK-LABEL: @For_PositiveStride(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[AR1:%.*]] = bitcast i32* [[AR:%.*]] to i8*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[M:%.*]], 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[M]], [[N:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[AR1]], i8 0, i64 [[TMP2]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = shl nuw i64 %m, 2
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader: ; preds = %for.inc4, %entry
|
|
%i.017 = phi i64 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
|
%1 = mul i64 %m, %i.017
|
|
%scevgep = getelementptr i32, i32* %ar, i64 %1
|
|
%scevgep1 = bitcast i32* %scevgep to i8*
|
|
%mul = mul nsw i64 %i.017, %m
|
|
call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep1, i8 0, i64 %0, i1 false)
|
|
br label %for.inc4
|
|
|
|
for.inc4: ; preds = %for.cond1.preheader
|
|
%inc5 = add nuw nsw i64 %i.017, 1
|
|
%exitcond18.not = icmp eq i64 %inc5, %n
|
|
br i1 %exitcond18.not, label %for.end6, label %for.cond1.preheader
|
|
|
|
for.end6: ; preds = %for.inc4
|
|
ret void
|
|
}
|
|
|
|
; The C code to generate this testcase:
|
|
; void test(int n, int m, int *ar)
|
|
; {
|
|
; long i;
|
|
; for (i=n-1; i>=0; i--) {
|
|
; int *arr = ar + i * m;
|
|
; memset(arr, 0, m * sizeof(int));
|
|
; }
|
|
; }
|
|
define void @For_NegativeStride(i32* %ar, i32 %n, i32 %m) {
|
|
; CHECK-LABEL: @For_NegativeStride(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[AR1:%.*]] = bitcast i32* [[AR:%.*]] to i8*
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[N:%.*]], 1
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i64 [[CONV]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
|
|
; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[M]] to i64
|
|
; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[CONV2]], 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[CONV]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[CONV1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[AR1]], i8 0, i64 [[TMP2]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%sub = sub nsw i32 %n, 1
|
|
%conv = sext i32 %sub to i64
|
|
%cmp1 = icmp sge i64 %conv, 0
|
|
br i1 %cmp1, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%conv1 = sext i32 %m to i64
|
|
%conv2 = sext i32 %m to i64
|
|
%mul3 = mul i64 %conv2, 4
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.inc
|
|
%i.02 = phi i64 [ %conv, %for.body.lr.ph ], [ %dec, %for.inc ]
|
|
%mul = mul nsw i64 %i.02, %conv1
|
|
%add.ptr = getelementptr inbounds i32, i32* %ar, i64 %mul
|
|
%0 = bitcast i32* %add.ptr to i8*
|
|
call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 %mul3, i1 false)
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%dec = add nsw i64 %i.02, -1
|
|
%cmp = icmp sge i64 %dec, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.inc
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|