LoopInterchange has several heuristic functions to determine if exchanging two loops is profitable or not. Whether or not to use each heuristic and the order in which to use them were fixed, but #125830 allows them to be changed internally at will. This patch adds a new option to control them via the compiler option. The previous patch also added an option to prioritize the vectorization heuristic. This patch also removes it to avoid conflicts between it and the newly introduced one, e.g., both `-loop-interchange-prioritize-vectorization=1` and `-loop-interchange-profitabilities='cache,vectorization'` are specified.
82 lines
3.3 KiB
LLVM
82 lines
3.3 KiB
LLVM
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
|
|
; RUN: -pass-remarks-output=%t -disable-output
|
|
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
|
|
|
|
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
|
|
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
|
|
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
|
|
|
|
@A = dso_local global [256 x [256 x float]] zeroinitializer
|
|
@B = dso_local global [256 x [256 x float]] zeroinitializer
|
|
@C = dso_local global [256 x [256 x float]] zeroinitializer
|
|
@D = dso_local global [256 x [256 x float]] zeroinitializer
|
|
@E = dso_local global [256 x [256 x float]] zeroinitializer
|
|
@F = dso_local global [256 x [256 x float]] zeroinitializer
|
|
|
|
; Check the behavior of the LoopInterchange cost-model. In the below code,
|
|
; exchanging the loops is not profitable in terms of cache, but it is necessary
|
|
; to vectorize the innermost loop.
|
|
;
|
|
; for (int i = 0; i < 256; i++)
|
|
; for (int j = 1; j < 256; j++)
|
|
; A[j][i] = A[j-1][i] + B[j][i] + C[i][j] + D[i][j] + E[i][j] + F[i][j];
|
|
;
|
|
|
|
; PROFIT-CACHE: --- !Missed
|
|
; PROFIT-CACHE-NEXT: Pass: loop-interchange
|
|
; PROFIT-CACHE-NEXT: Name: InterchangeNotProfitable
|
|
; PROFIT-CACHE-NEXT: Function: f
|
|
; PROFIT-CACHE-NEXT: Args:
|
|
; PROFIT-CACHE-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
|
|
; PROFIT-CACHE-NEXT: ...
|
|
|
|
; PROFIT-VEC: --- !Passed
|
|
; PROFIT-VEC-NEXT: Pass: loop-interchange
|
|
; PROFIT-VEC-NEXT: Name: Interchanged
|
|
; PROFIT-VEC-NEXT: Function: f
|
|
; PROFIT-VEC-NEXT: Args:
|
|
; PROFIT-VEC-NEXT: - String: Loop interchanged with enclosing loop.
|
|
; PROFIT-VEC-NEXT: ...
|
|
define void @f() {
|
|
entry:
|
|
br label %for.i.header
|
|
|
|
for.i.header:
|
|
%i = phi i64 [ 0, %entry ], [ %i.next, %for.i.inc ]
|
|
br label %for.j.body
|
|
|
|
for.j.body:
|
|
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
|
|
%j.dec = add nsw i64 %j, -1
|
|
%a.0.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %j.dec, i64 %i
|
|
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %j, i64 %i
|
|
%c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
|
|
%d.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @D, i64 %i, i64 %j
|
|
%e.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @E, i64 %i, i64 %j
|
|
%f.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @F, i64 %i, i64 %j
|
|
%a.0 = load float, ptr %a.0.index, align 4
|
|
%b = load float, ptr %b.index, align 4
|
|
%c = load float, ptr %c.index, align 4
|
|
%d = load float, ptr %d.index, align 4
|
|
%e = load float, ptr %e.index, align 4
|
|
%f = load float, ptr %f.index, align 4
|
|
%add.0 = fadd float %a.0, %b
|
|
%add.1 = fadd float %add.0, %c
|
|
%add.2 = fadd float %add.1, %d
|
|
%add.3 = fadd float %add.2, %e
|
|
%add.4 = fadd float %add.3, %f
|
|
%a.1.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %j, i64 %i
|
|
store float %add.4, ptr %a.1.index, align 4
|
|
%j.next = add nuw nsw i64 %j, 1
|
|
%cmp.j = icmp eq i64 %j.next, 256
|
|
br i1 %cmp.j, label %for.i.inc, label %for.j.body
|
|
|
|
for.i.inc:
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%cmp.i = icmp eq i64 %i.next, 256
|
|
br i1 %cmp.i, label %exit, label %for.i.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|