This patch bails out early if minimum depth is not met. As it stands today, the pass computes CacheCost before it attempts to do the transform. This is not needed if minimum depth is not met. This handles basic cases where depth is typically 1. As the patch avoids unnecessary computation, it is aimed to improve compile-time.
66 lines
2.1 KiB
LLVM
66 lines
2.1 KiB
LLVM
; REQUIRES: asserts
|
|
|
|
; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
|
|
|
|
@N = dso_local global i32 0, align 4
|
|
@a = dso_local global ptr null, align 8
|
|
@b = dso_local global ptr null, align 8
|
|
@c = dso_local global ptr null, align 8
|
|
|
|
; Loop interchange should not run delinearization
|
|
; for one loop case and should bail out early.
|
|
|
|
; CHECK-NOT: Delinearizing
|
|
; CHECK-NOT: Strides:
|
|
; CHECK-NOT: Terms:
|
|
; CHECK: Loop doesn't contain minimum nesting level.
|
|
|
|
define void @foo() {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 0, ptr %retval, align 4
|
|
store i32 0, ptr %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32, ptr %i, align 4
|
|
%1 = load i32, ptr @N, align 4
|
|
%cmp = icmp ult i32 %0, %1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond
|
|
br label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%2 = load ptr, ptr @b, align 8
|
|
%3 = load i32, ptr %i, align 4
|
|
%idxprom = zext i32 %3 to i64
|
|
%arrayidx = getelementptr inbounds nuw i32, ptr %2, i64 %idxprom
|
|
%4 = load i32, ptr %arrayidx, align 4
|
|
%5 = load ptr, ptr @c, align 8
|
|
%6 = load i32, ptr %i, align 4
|
|
%idxprom1 = zext i32 %6 to i64
|
|
%arrayidx2 = getelementptr inbounds nuw i32, ptr %5, i64 %idxprom1
|
|
%7 = load i32, ptr %arrayidx2, align 4
|
|
%add = add nsw i32 %4, %7
|
|
%8 = load ptr, ptr @a, align 8
|
|
%9 = load i32, ptr %i, align 4
|
|
%idxprom3 = zext i32 %9 to i64
|
|
%arrayidx4 = getelementptr inbounds nuw i32, ptr %8, i64 %idxprom3
|
|
store i32 %add, ptr %arrayidx4, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%10 = load i32, ptr %i, align 4
|
|
%inc = add i32 %10, 1
|
|
store i32 %inc, ptr %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond.cleanup
|
|
ret void
|
|
}
|
|
|