Splitting basic blocks into multiple statements if there are now additional scalar dependencies gives more freedom to the scheduler, but more statements also means higher compile-time complexity. Switch to finer statement granularity, the additional compile time should be limited by the number of operations quota. The regression tests are written for the -polly-stmt-granularity=bb setting, therefore we add that flag to those tests that break with the new default. Some of the tests only fail because the statements are named differently due to a basic block resulting in multiple statements, but which are removed during simplification of statements without side-effects. Previous commits tried to reduce this effect, but it is not completely avoidable. Differential Revision: https://reviews.llvm.org/D42151 llvm-svn: 324169
69 lines
2.6 KiB
LLVM
69 lines
2.6 KiB
LLVM
; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-mse -analyze < %s | FileCheck %s
|
|
; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE
|
|
;
|
|
; Verify that the expansion of an array with load after store in a same statement is not done.
|
|
;
|
|
; Original source code :
|
|
;
|
|
; #define Ni 2000
|
|
; #define Nj 3000
|
|
;
|
|
; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) {
|
|
; int i,j;
|
|
; for (i = 0; i < Ni; i++) {
|
|
; for (int j = 0; j<Nj; j++) {
|
|
; B[j] = j;
|
|
; C[j] = B[j];
|
|
; }
|
|
; }
|
|
; }
|
|
;
|
|
; Check that C is expanded
|
|
;
|
|
; CHECK: i64 MemRef_C_Stmt_for_body4_expanded[10000][10000]; // Element size 8
|
|
; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_C_Stmt_for_body4_expanded[i0, i1] };
|
|
;
|
|
; Check that B is not expanded
|
|
;
|
|
; CHECK-NOT: double MemRef_B_Stmt_for_body4_expanded[10000][10000]; // Element size 8
|
|
; MSE: MemRef_B has read after write to the same element in same statement. The dependences found during analysis may be wrong because Polly is not able to handle such case for now.
|
|
;
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
define void @mse(double* %A, double* %B, double* %C, double* %D) {
|
|
entry:
|
|
br label %entry.split
|
|
|
|
entry.split: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry.split, %for.inc9
|
|
%i.02 = phi i32 [ 0, %entry.split ], [ %inc10, %for.inc9 ]
|
|
br label %for.body4
|
|
|
|
for.body4: ; preds = %for.body, %for.body4
|
|
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
|
|
%0 = trunc i64 %indvars.iv to i32
|
|
%conv = sitofp i32 %0 to double
|
|
%arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv
|
|
store double %conv, double* %arrayidx, align 8
|
|
%arrayidx6 = getelementptr inbounds double, double* %B, i64 %indvars.iv
|
|
%1 = bitcast double* %arrayidx6 to i64*
|
|
%2 = load i64, i64* %1, align 8
|
|
%arrayidx8 = getelementptr inbounds double, double* %C, i64 %indvars.iv
|
|
%3 = bitcast double* %arrayidx8 to i64*
|
|
store i64 %2, i64* %3, align 8
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp ne i64 %indvars.iv.next, 10000
|
|
br i1 %exitcond, label %for.body4, label %for.inc9
|
|
|
|
for.inc9: ; preds = %for.body4
|
|
%inc10 = add nuw nsw i32 %i.02, 1
|
|
%exitcond3 = icmp ne i32 %inc10, 10000
|
|
br i1 %exitcond3, label %for.body, label %for.end11
|
|
|
|
for.end11: ; preds = %for.inc9
|
|
ret void
|
|
}
|