Motivated by pr43326 (https://bugs.llvm.org/show_bug.cgi?id=43326), where a slightly modified case is as follows. void f(int e[10][10][10], int f[10][10][10]) { for (int a = 0; a < 10; a++) for (int b = 0; b < 10; b++) for (int c = 0; c < 10; c++) f[c][b][a] = e[c][b][a]; } The ideal optimal access pattern after running interchange is supposed to be the following void f(int e[10][10][10], int f[10][10][10]) { for (int c = 0; c < 10; c++) for (int b = 0; b < 10; b++) for (int a = 0; a < 10; a++) f[c][b][a] = e[c][b][a]; } Currently loop interchange is limited to picking up the innermost loop and finding an order that is locally optimal for it. However, the pass failed to produce the globally optimal loop access order. For more complex examples what we get could be quite far from the globally optimal ordering. What is proposed in this patch is to do a "bubble-sort" fashion when doing interchange. By comparing neighbors in `LoopList` in each iteration, we would be able to move each loop onto a most appropriate place, hence this is an approach that tries to achieve the globally optimal ordering. The motivating example above is added as a test case. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D120386
97 lines
4.3 KiB
LLVM
97 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s
|
|
;; Checks the order of the inner phi nodes does not cause havoc.
|
|
;; The inner loop has a reduction into c. The IV is not the first phi.
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
target triple = "armv8--linux-gnueabihf"
|
|
|
|
|
|
|
|
; Function Attrs: norecurse nounwind
|
|
define void @test(i32 %T, [90 x i32]* noalias nocapture %C, [90 x [90 x i16]]* noalias nocapture readonly %A, i16* noalias nocapture readonly %B) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]]
|
|
; CHECK: for1.header.preheader:
|
|
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
|
|
; CHECK: for1.header:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC20:%.*]], [[FOR1_INC19:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I]], 90
|
|
; CHECK-NEXT: br label [[FOR2_HEADER_PREHEADER:%.*]]
|
|
; CHECK: for2.header.preheader:
|
|
; CHECK-NEXT: br label [[FOR2_HEADER:%.*]]
|
|
; CHECK: for2.header:
|
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[INC17:%.*]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]]
|
|
; CHECK: for3.preheader:
|
|
; CHECK-NEXT: br label [[FOR3:%.*]]
|
|
; CHECK: for3:
|
|
; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
|
|
; CHECK: for3.split1:
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* [[A:%.*]], i32 [[ADD]], i32 [[J]], i32 [[I]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
|
|
; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
|
|
; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]
|
|
; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[K]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
|
|
; CHECK-NEXT: br label [[FOR2_INC16]]
|
|
; CHECK: for3.split:
|
|
; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[K]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 90
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
|
|
; CHECK: for2.inc16:
|
|
; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1
|
|
; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90
|
|
; CHECK-NEXT: br i1 [[EXITCOND47]], label [[FOR1_INC19]], label [[FOR2_HEADER]]
|
|
; CHECK: for1.inc19:
|
|
; CHECK-NEXT: [[INC20]] = add nuw nsw i32 [[I]], 1
|
|
; CHECK-NEXT: [[EXITCOND48:%.*]] = icmp eq i32 [[INC20]], 90
|
|
; CHECK-NEXT: br i1 [[EXITCOND48]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]]
|
|
; CHECK: for1.loopexit:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %entry
|
|
%i = phi i32 [ %inc20, %for1.inc19 ], [ 0, %entry ]
|
|
%mul = mul nsw i32 %i, 90
|
|
br label %for2.header
|
|
|
|
for2.header: ; preds = %for2.inc16, %for1.header
|
|
%j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ]
|
|
br label %for3
|
|
|
|
for3: ; preds = %for3, %for2.header
|
|
%k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
|
|
%add = add nsw i32 %k, %mul
|
|
%arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %j, i32 %i
|
|
%0 = load i16, i16* %arrayidx, align 2
|
|
%add15 = add nsw i16 %0, 1
|
|
store i16 %add15, i16* %arrayidx
|
|
%inc = add nuw nsw i32 %k, 1
|
|
%exitcond = icmp eq i32 %inc, 90
|
|
br i1 %exitcond, label %for2.inc16, label %for3
|
|
|
|
for2.inc16: ; preds = %for.body6
|
|
%inc17 = add nuw nsw i32 %j, 1
|
|
%exitcond47 = icmp eq i32 %inc17, 90
|
|
br i1 %exitcond47, label %for1.inc19, label %for2.header
|
|
|
|
for1.inc19: ; preds = %for2.inc16
|
|
%inc20 = add nuw nsw i32 %i, 1
|
|
%exitcond48 = icmp eq i32 %inc20, 90
|
|
br i1 %exitcond48, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc19
|
|
br label %exit
|
|
|
|
exit: ; preds = %for1.loopexit
|
|
ret void
|
|
}
|