Files
clang-p2996/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
Ryotaro Kasuga 91f3965be4 [LoopInterchange] Fix the vectorizable check for a loop (#133667)
In the profitability check for vectorization, the dependency matrix was
not handled correctly. This can result to make a wrong decision: It may
say "this loop can be vectorized" when in fact it cannot. The root cause
of this is that the check process early returns when it finds '=' or 'I'
in the dependency matrix. To make sure that we can actually vectorize
the loop, we need to check all the rows of the matrix. This patch fixes
the process of checking whether we can vectorize the loop or not. Now it
won't make a wrong decision for a loop that cannot be vectorized.

Related: #131130
2025-04-03 16:21:19 +09:00

106 lines
3.6 KiB
LLVM

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize
; RUN: FileCheck -input-file %t %s
@A = dso_local global [256 x [256 x float]] zeroinitializer
@B = dso_local global [256 x [256 x float]] zeroinitializer
@C = dso_local global [256 x [256 x float]] zeroinitializer
; Check that the below loops are exchanged for vectorization.
;
; for (int i = 0; i < 256; i++) {
; for (int j = 1; j < 256; j++) {
; A[i][j] = A[i][j-1] + B[i][j];
; C[i][j] += 1;
; }
; }
;
; CHECK: --- !Passed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
; CHECK-NEXT: Function: interchange_necessary_for_vectorization
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
define void @interchange_necessary_for_vectorization() {
entry:
br label %for.i.header
for.i.header:
%i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
br label %for.j.body
for.j.body:
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
%j.dec = add nsw i64 %j, -1
%a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
%c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
%a = load float, ptr %a.load.index, align 4
%b = load float, ptr %b.index, align 4
%c = load float, ptr %c.index, align 4
%add.0 = fadd float %a, %b
%a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
store float %add.0, ptr %a.store.index, align 4
%add.1 = fadd float %c, 1.0
store float %add.1, ptr %c.index, align 4
%j.next = add nuw nsw i64 %j, 1
%cmp.j = icmp eq i64 %j.next, 256
br i1 %cmp.j, label %for.i.inc, label %for.j.body
for.i.inc:
%i.next = add nuw nsw i64 %i, 1
%cmp.i = icmp eq i64 %i.next, 256
br i1 %cmp.i, label %exit, label %for.i.header
exit:
ret void
}
; Check that the following innermost loop can be vectorized so that
; interchanging is unnecessary.
;
; for (int i = 0; i < 256; i++)
; for (int j = 1; j < 256; j++)
; A[i][j-1] = A[i][j] + B[i][j];
;
; FIXME: These loops are exchanged at this time due to the problem in
; profitability heuristic calculation for vectorization.
; CHECK: --- !Passed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
; CHECK-NEXT: Function: interchange_unnecesasry_for_vectorization
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
define void @interchange_unnecesasry_for_vectorization() {
entry:
br label %for.i.header
for.i.header:
%i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
br label %for.j.body
for.j.body:
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
%j.dec = add nsw i64 %j, -1
%a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
%a = load float, ptr %a.load.index, align 4
%b = load float, ptr %b.index, align 4
%add = fadd float %a, %b
%a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
store float %add, ptr %a.store.index, align 4
%j.next = add nuw nsw i64 %j, 1
%cmp.j = icmp eq i64 %j.next, 256
br i1 %cmp.j, label %for.i.inc, label %for.j.body
for.i.inc:
%i.next = add nuw nsw i64 %i, 1
%cmp.i = icmp eq i64 %i.next, 256
br i1 %cmp.i, label %exit, label %for.i.header
exit:
ret void
}