Inner-loop only reductions require additional checks to make sure they form a load-phi-store cycle across inner and outer loop. Otherwise the reduction value is not properly preserved. This patch disables interchanging such loops for now, as it causes miscompiles in some cases and it seems to apply only for a tiny amount of loops. Across the test-suite, SPEC2000 and SPEC2006, 61 instead of 62 loops are interchange with inner loop reduction support disabled. With -loop-interchange-threshold=-1000, 3256 instead of 3267. See the discussion and history of D53027 for an outline of how such legality checks could look like. Reviewers: efriedma, mcrosier, davide Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D53027 llvm-svn: 345877
95 lines
4.1 KiB
LLVM
95 lines
4.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=-1000 -S 2>&1 | FileCheck %s
|
|
;; Checks the order of the inner phi nodes does not cause havoc.
|
|
;; The inner loop has a reduction into c. The IV is not the first phi.
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
target triple = "armv8--linux-gnueabihf"
|
|
|
|
|
|
|
|
; Function Attrs: norecurse nounwind
|
|
define void @test(i32 %T, [90 x i32]* noalias nocapture %C, i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]]
|
|
; CHECK: for1.header.preheader:
|
|
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
|
|
; CHECK: for1.header:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC20:%.*]], [[FOR1_INC19:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I]], 90
|
|
; CHECK-NEXT: br label [[FOR2_HEADER_PREHEADER:%.*]]
|
|
; CHECK: for2.header.preheader:
|
|
; CHECK-NEXT: br label [[FOR2_HEADER:%.*]]
|
|
; CHECK: for2.header:
|
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[INC17:%.*]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]]
|
|
; CHECK: for3.preheader:
|
|
; CHECK-NEXT: br label [[FOR3:%.*]]
|
|
; CHECK: for3:
|
|
; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[INC:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
|
|
; CHECK: for3.split1:
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[ADD]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
|
|
; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
|
|
; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]
|
|
; CHECK-NEXT: br label [[FOR2_INC16]]
|
|
; CHECK: for3.split:
|
|
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
|
|
; CHECK: for2.inc16:
|
|
; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1
|
|
; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90
|
|
; CHECK-NEXT: br i1 [[EXITCOND47]], label [[FOR1_INC19]], label [[FOR2_HEADER]]
|
|
; CHECK: for1.inc19:
|
|
; CHECK-NEXT: [[INC20]] = add nuw nsw i32 [[I]], 1
|
|
; CHECK-NEXT: [[EXITCOND48:%.*]] = icmp eq i32 [[INC20]], 90
|
|
; CHECK-NEXT: br i1 [[EXITCOND48]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]]
|
|
; CHECK: for1.loopexit:
|
|
; CHECK-NEXT: br label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %entry
|
|
%i = phi i32 [ %inc20, %for1.inc19 ], [ 0, %entry ]
|
|
%mul = mul nsw i32 %i, 90
|
|
br label %for2.header
|
|
|
|
for2.header: ; preds = %for2.inc16, %for1.header
|
|
%j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ]
|
|
br label %for3
|
|
|
|
for3: ; preds = %for3, %for2.header
|
|
%k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
|
|
%add = add nsw i32 %k, %mul
|
|
%arrayidx = getelementptr inbounds i16, i16* %A, i32 %add
|
|
%0 = load i16, i16* %arrayidx, align 2
|
|
%add15 = add nsw i16 %0, 1
|
|
store i16 %add15, i16* %arrayidx
|
|
%inc = add nuw nsw i32 %k, 1
|
|
%exitcond = icmp eq i32 %inc, 90
|
|
br i1 %exitcond, label %for2.inc16, label %for3
|
|
|
|
for2.inc16: ; preds = %for.body6
|
|
%inc17 = add nuw nsw i32 %j, 1
|
|
%exitcond47 = icmp eq i32 %inc17, 90
|
|
br i1 %exitcond47, label %for1.inc19, label %for2.header
|
|
|
|
for1.inc19: ; preds = %for2.inc16
|
|
%inc20 = add nuw nsw i32 %i, 1
|
|
%exitcond48 = icmp eq i32 %inc20, 90
|
|
br i1 %exitcond48, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc19
|
|
br label %exit
|
|
|
|
exit: ; preds = %for1.loopexit
|
|
ret void
|
|
}
|