Files
clang-p2996/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
Sjoerd Meijer 456ec1c2f4 [LoopInterchange] Remove 'S' Scalar Dependencies (#119345)
We are not handling 'S' scalar dependencies correctly and have at least
the following miscompiles related to that:

[LoopInterchange] incorrect handling of scalar dependencies and dependence vectors starting with ">" #54176
[LoopInterchange] Interchange breaks program correctness #46867
[LoopInterchange] Loops should not interchanged due to dependencies #47259
[LoopInterchange] Loops should not interchanged due to control flow #47401

This patch does no longer insert the "S" dependency/direction into the
dependency matrix, so a dependency is never "S". We seem to have
forgotten what the exact meaning is of this dependency type, and don't
see why it should be treated differently.

We prefer correctness over incorrect and more aggressive results. I.e.,
this prevents the miscompiles at the expense of handling less cases,
i.e. making interchange more pessimistic. However, some of the cases
that are now rejected for dependence analysis reasons, were rejected
before too but for other reasons (e.g. profitability). So at least for
the llvm regression tests, the number of regression are very reasonable.
This should be a stopgap. We would like to get interchange enabled by
default and thus prefer correctness over unsafe transforms, and later
see if we can get solve the regressions.
2025-01-20 13:04:58 +00:00

128 lines
5.3 KiB
LLVM

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
; Both tests should be rejected as interchange candidates. For now, they are
; rejected for dependence analysis reasons, but that's because support for 'S'
; scalar dependencies was removed. When that is properly, the inner loop only
; reductions should still not be supported currently, see discussion at D53027
; for more information on the required checks.
@A = common global [500 x [500 x i32]] zeroinitializer
@X = common global i32 0
@B = common global [500 x [500 x i32]] zeroinitializer
@Y = common global i32 0
;; global X
;; for( int i=1;i<N;i++)
;; for( int j=1;j<N;j++)
;; X+=A[j][i];
; CHECK: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: reduction_01
; IR-LABEL: @reduction_01(
; IR-NOT: split
define void @reduction_01(i32 %N) {
entry:
%cmp16 = icmp sgt i32 %N, 1
br i1 %cmp16, label %for.body3.lr.ph, label %for.end8
for.body3.lr.ph: ; preds = %for.cond1.for.inc6_crit_edge, %entry
%indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]
%X.promoted = load i32, ptr @X
br label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%add15 = phi i32 [ %X.promoted, %for.body3.lr.ph ], [ %add, %for.body3 ]
%arrayidx5 = getelementptr inbounds [500 x [500 x i32]], ptr @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
%0 = load i32, ptr %arrayidx5
%add = add nsw i32 %add15, %0
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3
for.cond1.for.inc6_crit_edge: ; preds = %for.body3
%add.lcssa = phi i32 [ %add, %for.body3 ]
store i32 %add.lcssa, ptr @X
%indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
%lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
%exitcond21 = icmp eq i32 %lftr.wideiv20, %N
br i1 %exitcond21, label %for.end8, label %for.body3.lr.ph
for.end8: ; preds = %for.cond1.for.inc6_crit_edge, %entry
ret void
}
;; Not tightly nested. Do not interchange.
;; for( int i=1;i<N;i++)
;; for( int j=1;j<N;j++) {
;; for( int k=1;k<N;k++) {
;; X+=A[k][j];
;; }
;; Y+=B[j][i];
;; }
;; Not tightly nested. Do not interchange.
;; Not interchanged hence the phi's in the inner loop will not be split.
; CHECK: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: reduction_03
; IR-LABEL: @reduction_03(
; IR-NOT: split
define void @reduction_03(i32 %N) {
entry:
%cmp35 = icmp sgt i32 %N, 1
br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19
for.cond4.preheader.lr.ph: ; preds = %for.cond1.for.inc17_crit_edge, %entry
%indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ]
%Y.promoted = load i32, ptr @Y
br label %for.body6.lr.ph
for.body6.lr.ph: ; preds = %for.cond4.for.end_crit_edge, %for.cond4.preheader.lr.ph
%indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ]
%add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ]
%X.promoted = load i32, ptr @X
br label %for.body6
for.body6: ; preds = %for.body6, %for.body6.lr.ph
%indvars.iv = phi i64 [ 1, %for.body6.lr.ph ], [ %indvars.iv.next, %for.body6 ]
%arrayidx8 = getelementptr inbounds [500 x [500 x i32]], ptr @A, i64 0, i64 %indvars.iv, i64 %indvars.iv37
%0 = load i32, ptr %arrayidx8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %N
br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6
for.cond4.for.end_crit_edge: ; preds = %for.body6
%arrayidx12 = getelementptr inbounds [500 x [500 x i32]], ptr @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41
%1 = load i32, ptr %arrayidx12
%add13 = add nsw i32 %add1334, %1
%indvars.iv.next38 = add nuw nsw i64 %indvars.iv37, 1
%lftr.wideiv39 = trunc i64 %indvars.iv.next38 to i32
%exitcond40 = icmp eq i32 %lftr.wideiv39, %N
br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph
for.cond1.for.inc17_crit_edge: ; preds = %for.cond4.for.end_crit_edge
%add13.lcssa = phi i32 [ %add13, %for.cond4.for.end_crit_edge ]
store i32 %add13.lcssa, ptr @Y
%indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1
%lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32
%exitcond44 = icmp eq i32 %lftr.wideiv43, %N
br i1 %exitcond44, label %for.end19, label %for.cond4.preheader.lr.ph
for.end19: ; preds = %for.cond1.for.inc17_crit_edge, %entry
ret void
}