Files
clang-p2996/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
Ryotaro Kasuga 528e408b94 [LoopInterchange] Add an option to control the cost heuristics applied (#133664)
LoopInterchange has several heuristic functions to determine if
exchanging two loops is profitable or not. Whether or not to use each
heuristic and the order in which to use them were fixed, but #125830
allows them to be changed internally at will. This patch adds a new
option to control them via the compiler option.

The previous patch also added an option to prioritize the vectorization
heuristic. This patch also removes it to avoid conflicts between it and
the newly introduced one, e.g., both
`-loop-interchange-prioritize-vectorization=1` and
`-loop-interchange-profitabilities='cache,vectorization'` are specified.
2025-04-02 15:41:40 +09:00

82 lines
3.3 KiB
LLVM

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
; RUN: -pass-remarks-output=%t -disable-output
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
@A = dso_local global [256 x [256 x float]] zeroinitializer
@B = dso_local global [256 x [256 x float]] zeroinitializer
@C = dso_local global [256 x [256 x float]] zeroinitializer
@D = dso_local global [256 x [256 x float]] zeroinitializer
@E = dso_local global [256 x [256 x float]] zeroinitializer
@F = dso_local global [256 x [256 x float]] zeroinitializer
; Check the behavior of the LoopInterchange cost-model. In the below code,
; exchanging the loops is not profitable in terms of cache, but it is necessary
; to vectorize the innermost loop.
;
; for (int i = 0; i < 256; i++)
; for (int j = 1; j < 256; j++)
; A[j][i] = A[j-1][i] + B[j][i] + C[i][j] + D[i][j] + E[i][j] + F[i][j];
;
; PROFIT-CACHE: --- !Missed
; PROFIT-CACHE-NEXT: Pass: loop-interchange
; PROFIT-CACHE-NEXT: Name: InterchangeNotProfitable
; PROFIT-CACHE-NEXT: Function: f
; PROFIT-CACHE-NEXT: Args:
; PROFIT-CACHE-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
; PROFIT-CACHE-NEXT: ...
; PROFIT-VEC: --- !Passed
; PROFIT-VEC-NEXT: Pass: loop-interchange
; PROFIT-VEC-NEXT: Name: Interchanged
; PROFIT-VEC-NEXT: Function: f
; PROFIT-VEC-NEXT: Args:
; PROFIT-VEC-NEXT: - String: Loop interchanged with enclosing loop.
; PROFIT-VEC-NEXT: ...
define void @f() {
entry:
br label %for.i.header
for.i.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.i.inc ]
br label %for.j.body
for.j.body:
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
%j.dec = add nsw i64 %j, -1
%a.0.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %j.dec, i64 %i
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %j, i64 %i
%c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
%d.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @D, i64 %i, i64 %j
%e.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @E, i64 %i, i64 %j
%f.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @F, i64 %i, i64 %j
%a.0 = load float, ptr %a.0.index, align 4
%b = load float, ptr %b.index, align 4
%c = load float, ptr %c.index, align 4
%d = load float, ptr %d.index, align 4
%e = load float, ptr %e.index, align 4
%f = load float, ptr %f.index, align 4
%add.0 = fadd float %a.0, %b
%add.1 = fadd float %add.0, %c
%add.2 = fadd float %add.1, %d
%add.3 = fadd float %add.2, %e
%add.4 = fadd float %add.3, %f
%a.1.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %j, i64 %i
store float %add.4, ptr %a.1.index, align 4
%j.next = add nuw nsw i64 %j, 1
%cmp.j = icmp eq i64 %j.next, 256
br i1 %cmp.j, label %for.i.inc, label %for.j.body
for.i.inc:
%i.next = add nuw nsw i64 %i, 1
%cmp.i = icmp eq i64 %i.next, 256
br i1 %cmp.i, label %exit, label %for.i.header
exit:
ret void
}