The current LIR does not deal with runtime-determined memset-size. This patch utilizes SCEV and check if the PointerStrideSCEV and the MemsetSizeSCEV are equal. Before comparison the pass would try to fold the expression that is already protected by the loop guard. Testcase file `memset-runtime.ll`, `memset-runtime-debug.ll` added. This patch deals with proper loop-idiom. Proceeding patch wants to deal with SCEV-s that are inequal after folding with the loop guards. Reviewed By: lebedev.ri, Whitney Differential Revision: https://reviews.llvm.org/D107353
271 lines
12 KiB
LLVM
271 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; REQUIRES: asserts
|
|
; RUN: opt < %s -S -debug -passes=loop-idiom 2>&1 | FileCheck %s
|
|
; The C code to generate this testcase:
|
|
; void test(int *ar, int n, int m)
|
|
; {
|
|
; long i;
|
|
; for (i=0; i<n; ++i) {
|
|
; int *arr = ar + i * m;
|
|
; memset(arr, 0, i + m * sizeof(int));
|
|
; }
|
|
; }
|
|
|
|
; Check on debug outputs...
|
|
; CHECK: loop-idiom Scanning: F[MemsetSize_LoopVariant] Countable Loop %for.body
|
|
; CHECK-NEXT: memset size is non-constant
|
|
; CHECK-NEXT: memset size is not a loop-invariant, abort
|
|
; CHECK: loop-idiom Scanning: F[MemsetSize_Stride_Mismatch] Countable Loop %for.body
|
|
; CHECK-NEXT: memset size is non-constant
|
|
; CHECK-NEXT: MemsetSizeSCEV: (4 * (sext i32 %m to i64))<nsw>
|
|
; CHECK-NEXT: PositiveStrideSCEV: (4 + (4 * (sext i32 %m to i64))<nsw>)<nsw>
|
|
; CHECK-NEXT: SCEV don't match, abort
|
|
; CHECK: loop-idiom Scanning: F[NonZeroAddressSpace] Countable Loop %for.cond1.preheader
|
|
; CHECK-NEXT: memset size is non-constant
|
|
; CHECK-NEXT: pointer is not in address space zero, abort
|
|
; CHECK: loop-idiom Scanning: F[NonAffinePointer] Countable Loop %for.body
|
|
; CHECK-NEXT: Pointer is not affine, abort
|
|
|
|
define void @MemsetSize_LoopVariant(i32* %ar, i32 %n, i32 %m) {
|
|
; CHECK-LABEL: @MemsetSize_LoopVariant(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[N:%.*]] to i64
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 0, [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
|
|
; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[M]] to i64
|
|
; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[CONV2]], 4
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_02]], [[CONV1]]
|
|
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[AR:%.*]], i64 [[MUL]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8*
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_02]], [[MUL3]]
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 [[ADD]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
|
|
; CHECK: for.cond.for.end_crit_edge:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%conv = sext i32 %n to i64
|
|
%cmp1 = icmp slt i64 0, %conv
|
|
br i1 %cmp1, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%conv1 = sext i32 %m to i64
|
|
%conv2 = sext i32 %m to i64
|
|
%mul3 = mul i64 %conv2, 4
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.inc
|
|
%i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
|
%mul = mul nsw i64 %i.02, %conv1
|
|
%add.ptr = getelementptr inbounds i32, i32* %ar, i64 %mul
|
|
%0 = bitcast i32* %add.ptr to i8*
|
|
%add = add nsw i64 %i.02, %mul3
|
|
call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 %add, i1 false)
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%inc = add nuw nsw i64 %i.02, 1
|
|
%cmp = icmp slt i64 %inc, %conv
|
|
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.inc
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
|
ret void
|
|
}
|
|
; void test(int *ar, int n, int m)
|
|
; {
|
|
; long i;
|
|
; for (i=0; i<n; ++i) {
|
|
; int *arr = ar + i + i * m;
|
|
; memset(arr, 0, m * sizeof(int));
|
|
; }
|
|
; }
|
|
define void @MemsetSize_Stride_Mismatch(i32* %ar, i32 %n, i32 %m) {
|
|
; CHECK-LABEL: @MemsetSize_Stride_Mismatch(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[N:%.*]] to i64
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 0, [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
|
|
; CHECK-NEXT: [[CONV3:%.*]] = sext i32 [[M]] to i64
|
|
; CHECK-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[AR:%.*]], i64 [[I_02]]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_02]], [[CONV1]]
|
|
; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i32, i32* [[ADD_PTR]], i64 [[MUL]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR2]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 [[MUL4]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
|
|
; CHECK: for.cond.for.end_crit_edge:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%conv = sext i32 %n to i64
|
|
%cmp1 = icmp slt i64 0, %conv
|
|
br i1 %cmp1, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%conv1 = sext i32 %m to i64
|
|
%conv3 = sext i32 %m to i64
|
|
%mul4 = mul i64 %conv3, 4
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.inc
|
|
%i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
|
%add.ptr = getelementptr inbounds i32, i32* %ar, i64 %i.02
|
|
%mul = mul nsw i64 %i.02, %conv1
|
|
%add.ptr2 = getelementptr inbounds i32, i32* %add.ptr, i64 %mul
|
|
%0 = bitcast i32* %add.ptr2 to i8*
|
|
call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 %mul4, i1 false)
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%inc = add nuw nsw i64 %i.02, 1
|
|
%cmp = icmp slt i64 %inc, %conv
|
|
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.inc
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @NonZeroAddressSpace(i32 addrspace(2)* nocapture %ar, i64 %n, i64 %m) {
|
|
; CHECK-LABEL: @NonZeroAddressSpace(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[M:%.*]], 2
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader:
|
|
; CHECK-NEXT: [[I_017:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC4:%.*]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[M]], [[I_017]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32 addrspace(2)* [[AR:%.*]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32 addrspace(2)* [[SCEVGEP]] to i8 addrspace(2)*
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_017]], [[M]]
|
|
; CHECK-NEXT: call void @llvm.memset.p2i8.i64(i8 addrspace(2)* align 4 [[SCEVGEP1]], i8 0, i64 [[TMP0]], i1 false)
|
|
; CHECK-NEXT: br label [[FOR_INC4]]
|
|
; CHECK: for.inc4:
|
|
; CHECK-NEXT: [[INC5]] = add nuw nsw i64 [[I_017]], 1
|
|
; CHECK-NEXT: [[EXITCOND18_NOT:%.*]] = icmp eq i64 [[INC5]], [[N:%.*]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND18_NOT]], label [[FOR_END6:%.*]], label [[FOR_COND1_PREHEADER]]
|
|
; CHECK: for.end6:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = shl nuw i64 %m, 2
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader: ; preds = %for.inc4, %entry
|
|
%i.017 = phi i64 [ 0, %entry ], [ %inc5, %for.inc4 ]
|
|
%1 = mul i64 %m, %i.017
|
|
%scevgep = getelementptr i32, i32 addrspace(2)* %ar, i64 %1
|
|
%scevgep1 = bitcast i32 addrspace(2)* %scevgep to i8 addrspace(2)*
|
|
%mul = mul nsw i64 %i.017, %m
|
|
call void @llvm.memset.p2i8.i64(i8 addrspace(2)* align 4 %scevgep1, i8 0, i64 %0, i1 false)
|
|
br label %for.inc4
|
|
|
|
for.inc4: ; preds = %for.cond1.preheader
|
|
%inc5 = add nuw nsw i64 %i.017, 1
|
|
%exitcond18.not = icmp eq i64 %inc5, %n
|
|
br i1 %exitcond18.not, label %for.end6, label %for.cond1.preheader
|
|
|
|
for.end6: ; preds = %for.inc4
|
|
ret void
|
|
}
|
|
|
|
; void test(int *ar, int n, int m)
|
|
; {
|
|
; long i;
|
|
; for (i=0; i<n; ++i) {
|
|
; int *arr = ar + i * m;
|
|
; memset(arr, 0, m * sizeof(int));
|
|
; ar = ar + i;
|
|
; }
|
|
; }
|
|
define void @NonAffinePointer(i32* %ar, i32 %n, i32 %m) {
|
|
; CHECK-LABEL: @NonAffinePointer(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[N:%.*]] to i64
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 0, [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64
|
|
; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[M]] to i64
|
|
; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[CONV2]], 4
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[AR_ADDR_03:%.*]] = phi i32* [ [[AR:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR4:%.*]], [[FOR_INC:%.*]] ]
|
|
; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_02]], [[CONV1]]
|
|
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[AR_ADDR_03]], i64 [[MUL]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8*
|
|
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 [[MUL3]], i1 false)
|
|
; CHECK-NEXT: [[ADD_PTR4]] = getelementptr inbounds i32, i32* [[AR_ADDR_03]], i64 [[I_02]]
|
|
; CHECK-NEXT: br label [[FOR_INC]]
|
|
; CHECK: for.inc:
|
|
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[CONV]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
|
|
; CHECK: for.cond.for.end_crit_edge:
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%conv = sext i32 %n to i64
|
|
%cmp1 = icmp slt i64 0, %conv
|
|
br i1 %cmp1, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%conv1 = sext i32 %m to i64
|
|
%conv2 = sext i32 %m to i64
|
|
%mul3 = mul i64 %conv2, 4
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.inc
|
|
%ar.addr.03 = phi i32* [ %ar, %for.body.lr.ph ], [ %add.ptr4, %for.inc ]
|
|
%i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
|
|
%mul = mul nsw i64 %i.02, %conv1
|
|
%add.ptr = getelementptr inbounds i32, i32* %ar.addr.03, i64 %mul
|
|
%0 = bitcast i32* %add.ptr to i8*
|
|
call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 %mul3, i1 false)
|
|
%add.ptr4 = getelementptr inbounds i32, i32* %ar.addr.03, i64 %i.02
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%inc = add nuw nsw i64 %i.02, 1
|
|
%cmp = icmp slt i64 %inc, %conv
|
|
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.inc
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|
|
declare void @llvm.memset.p2i8.i64(i8 addrspace(2)* nocapture writeonly, i8, i64, i1 immarg)
|