Summary:
Two utils methods have essentially the same functionality. This is an attempt to merge them into one.
1. lib/Transforms/Utils/Local.cpp : MergeBasicBlockIntoOnlyPred
2. lib/Transforms/Utils/BasicBlockUtils.cpp : MergeBlockIntoPredecessor
Prior to the patch:
1. MergeBasicBlockIntoOnlyPred
Updates either DomTree or DeferredDominance
Moves all instructions from Pred to BB, deletes Pred
Asserts BB has single predecessor
If address was taken, replace the block address with constant 1 (?)
2. MergeBlockIntoPredecessor
Updates DomTree, LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
After the patch:
Method 2. MergeBlockIntoPredecessor is attempting to become the new default:
Updates DomTree or DeferredDominance, and LoopInfo and MemoryDependenceResults
Moves all instruction from BB to Pred, deletes BB
Returns if doesn't have a single predecessor
Returns if BB's address was taken
Uses of MergeBasicBlockIntoOnlyPred that need to be replaced:
1. lib/Transforms/Scalar/LoopSimplifyCFG.cpp
Updated in this patch. No challenges.
2. lib/CodeGen/CodeGenPrepare.cpp
Updated in this patch.
i. eliminateFallThrough is straightforward, but I added using a temporary array to avoid the iterator invalidation.
ii. eliminateMostlyEmptyBlock(s) methods also now use a temporary array for blocks
Some interesting aspects:
- Since Pred is not deleted (BB is), the entry block does not need updating.
- The entry block was being updated with the deleted block in eliminateMostlyEmptyBlock. Added assert to make obvious that BB=SinglePred.
- isMergingEmptyBlockProfitable assumes BB is the one to be deleted.
- eliminateMostlyEmptyBlock(BB) does not delete BB on one path, it deletes its unique predecessor instead.
- adding some test owner as subscribers for the interesting tests modified:
test/CodeGen/X86/avx-cmp.ll
test/CodeGen/AMDGPU/nested-loop-conditions.ll
test/CodeGen/AMDGPU/si-annotate-cf.ll
test/CodeGen/X86/hoist-spill.ll
test/CodeGen/X86/2006-11-17-IllegalMove.ll
3. lib/Transforms/Scalar/JumpThreading.cpp
Not covered in this patch. It is the only use case using the DeferredDominance.
I would defer to Brian Rzycki to make this replacement.
Reviewers: chandlerc, spatel, davide, brzycki, bkramer, javed.absar
Subscribers: qcolombet, sanjoy, nemanjai, nhaehnle, jlebar, tpr, kbarton, RKSimon, wmi, arsenm, llvm-commits
Differential Revision: https://reviews.llvm.org/D48202
llvm-svn: 335183
98 lines
3.4 KiB
LLVM
98 lines
3.4 KiB
LLVM
; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s
|
|
;
|
|
; LSR should only check for valid address modes when the IV user is a
|
|
; memory address.
|
|
; svn r158536, rdar://11635990
|
|
;
|
|
; Note that we still don't produce the best code here because we fail
|
|
; to coalesce the IV. See <rdar://problem/11680670> [coalescer] IVs
|
|
; need to be scheduled to expose coalescing.
|
|
|
|
; LSR before the fix:
|
|
;The chosen solution requires 4 regs, with addrec cost 1, plus 3 base adds, plus 2 setup cost:
|
|
; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
|
|
; reg(%v3) + reg({0,+,-1}<%while.cond.i.i>) + imm(1)
|
|
; LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32
|
|
; reg(%v3) + reg({0,+,-1}<%while.cond.i.i>)
|
|
; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
|
|
; reg((-4 + (4 * %v3) + %v1)) + 4*reg({0,+,-1}<%while.cond.i.i>)
|
|
; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
|
|
; reg((-4 + (4 * %v3) + %v4)) + 4*reg({0,+,-1}<%while.cond.i.i>)
|
|
; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
|
|
; reg(%v3)
|
|
;
|
|
; LSR after the fix:
|
|
;The chosen solution requires 4 regs, with addrec cost 1, plus 1 base add, plus 2 setup cost:
|
|
; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
|
|
; reg({%v3,+,-1}<nsw><%while.cond.i.i>) + imm(1)
|
|
; LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32
|
|
; reg({%v3,+,-1}<nsw><%while.cond.i.i>)
|
|
; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
|
|
; reg((-4 + %v1)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>)
|
|
; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32*
|
|
; reg((-4 + %v4)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>)
|
|
; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32
|
|
; reg(%v3)
|
|
|
|
|
|
%s = type { i32* }
|
|
|
|
@ncol = external global i32, align 4
|
|
|
|
declare i32* @getptr() nounwind
|
|
declare %s* @getstruct() nounwind
|
|
|
|
; CHECK: @main
|
|
; Check that the loop preheader contains no address computation.
|
|
; CHECK: %while.cond.i.i
|
|
; CHECK-NOT: add{{.*}}lsl
|
|
; CHECK: ldr{{.*}}lsl #2
|
|
; CHECK: ldr{{.*}}lsl #2
|
|
define i32 @main() nounwind ssp {
|
|
entry:
|
|
%v0 = load i32, i32* @ncol, align 4
|
|
%v1 = tail call i32* @getptr() nounwind
|
|
%cmp10.i = icmp eq i32 %v0, 0
|
|
br label %while.cond.outer
|
|
|
|
while.cond.outer:
|
|
%call18 = tail call %s* @getstruct() nounwind
|
|
br label %while.cond
|
|
|
|
while.cond:
|
|
%cmp20 = icmp eq i32* %v1, null
|
|
br label %while.body
|
|
|
|
while.body:
|
|
%v3 = load i32, i32* @ncol, align 4
|
|
br label %end_of_chain
|
|
|
|
end_of_chain:
|
|
%state.i = getelementptr inbounds %s, %s* %call18, i32 0, i32 0
|
|
%v4 = load i32*, i32** %state.i, align 4
|
|
br label %while.cond.i.i
|
|
|
|
while.cond.i.i:
|
|
%counter.0.i.i = phi i32 [ %v3, %end_of_chain ], [ %dec.i.i, %land.rhs.i.i ]
|
|
%dec.i.i = add nsw i32 %counter.0.i.i, -1
|
|
%tobool.i.i = icmp eq i32 %counter.0.i.i, 0
|
|
br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i
|
|
|
|
land.rhs.i.i:
|
|
%arrayidx.i.i = getelementptr inbounds i32, i32* %v4, i32 %dec.i.i
|
|
%v5 = load i32, i32* %arrayidx.i.i, align 4
|
|
%arrayidx1.i.i = getelementptr inbounds i32, i32* %v1, i32 %dec.i.i
|
|
%v6 = load i32, i32* %arrayidx1.i.i, align 4
|
|
%cmp.i.i = icmp eq i32 %v5, %v6
|
|
br i1 %cmp.i.i, label %while.cond.i.i, label %equal_data.exit.i
|
|
|
|
equal_data.exit.i:
|
|
ret i32 %counter.0.i.i
|
|
|
|
where.exit:
|
|
br label %while.end.i
|
|
|
|
while.end.i:
|
|
ret i32 %v3
|
|
}
|