Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1
if this would make the inner GEP loop invariant and thus hoistable.
This is intended to replace an InstCombine fold that does this (in
04f61fb73d/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (L2006)).
The problem with the InstCombine fold is that LoopInfo is an optional
dependency, so it is not performed reliably.
Differential Revision: https://reviews.llvm.org/D146813
109 lines
3.8 KiB
LLVM
109 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
|
define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA, i32 signext %LenB) #0 {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: li 6, 0
|
|
; CHECK-NEXT: addi 7, 3, 4
|
|
; CHECK-NEXT: addi 4, 4, -4
|
|
; CHECK-NEXT: li 8, 0
|
|
; CHECK-NEXT: .LBB0_1: # %block3
|
|
; CHECK-NEXT: # =>This Loop Header: Depth=1
|
|
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
|
|
; CHECK-NEXT: mr 9, 6
|
|
; CHECK-NEXT: addi 6, 6, 1
|
|
; CHECK-NEXT: extsw 8, 8
|
|
; CHECK-NEXT: cmpw 6, 5
|
|
; CHECK-NEXT: extsw 9, 9
|
|
; CHECK-NEXT: crnot 20, 0
|
|
; CHECK-NEXT: sldi 10, 8, 2
|
|
; CHECK-NEXT: sldi 9, 9, 2
|
|
; CHECK-NEXT: addi 8, 8, 1
|
|
; CHECK-NEXT: add 10, 4, 10
|
|
; CHECK-NEXT: bc 12, 20, .LBB0_5
|
|
; CHECK-NEXT: .p2align 5
|
|
; CHECK-NEXT: .LBB0_2: # %if.end
|
|
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
|
|
; CHECK-NEXT: lwz 11, 4(10)
|
|
; CHECK-NEXT: cmplwi 11, 0
|
|
; CHECK-NEXT: addi 11, 10, 4
|
|
; CHECK-NEXT: beq 0, .LBB0_4
|
|
; CHECK-NEXT: # %bb.3: # %if.then4
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwzx 12, 7, 9
|
|
; CHECK-NEXT: addi 8, 8, 1
|
|
; CHECK-NEXT: stw 12, 8(10)
|
|
; CHECK-NEXT: mr 10, 11
|
|
; CHECK-NEXT: bc 4, 20, .LBB0_2
|
|
; CHECK-NEXT: b .LBB0_5
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_4: # %if.end9
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lwzx 10, 7, 9
|
|
; CHECK-NEXT: addi 10, 10, 1
|
|
; CHECK-NEXT: stwx 10, 7, 9
|
|
; CHECK-NEXT: b .LBB0_1
|
|
; CHECK-NEXT: .LBB0_5: # %if.then
|
|
; CHECK-NEXT: lwax 3, 9, 3
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
br label %block2
|
|
|
|
block2: ; preds = %entry
|
|
br label %block3
|
|
|
|
block3: ; preds = %block8, %block2
|
|
%OuterInd.0 = phi i32 [ 0, %block2 ], [ %inc, %block8 ]
|
|
%InnerInd.0 = phi i32 [ 0, %block2 ], [ %inc1, %block8 ]
|
|
%inc = add nsw i32 %OuterInd.0, 1
|
|
br label %block4
|
|
|
|
block4: ; preds = %if.then4, %block3
|
|
%InnerInd.1 = phi i32 [ %InnerInd.0, %block3 ], [ %inc1, %if.then4 ]
|
|
%cmp = icmp sge i32 %inc, %LenA
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %block4
|
|
%sub = sub nsw i32 %inc, 1
|
|
%idxprom = sext i32 %sub to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
ret i32 %0
|
|
|
|
if.end: ; preds = %block4
|
|
br label %block5
|
|
|
|
block5: ; preds = %if.end
|
|
%inc1 = add nsw i32 %InnerInd.1, 1
|
|
%idxprom2 = sext i32 %InnerInd.1 to i64
|
|
%arrayidx3 = getelementptr inbounds i32, ptr %PtrB, i64 %idxprom2
|
|
%1 = load i32, ptr %arrayidx3, align 4
|
|
%tobool = icmp ne i32 %1, 0
|
|
br i1 %tobool, label %if.then4, label %if.end9
|
|
|
|
if.then4: ; preds = %block5
|
|
%idxprom5 = sext i32 %inc to i64
|
|
%arrayidx6 = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom5
|
|
%2 = load i32, ptr %arrayidx6, align 4
|
|
%idxprom7 = sext i32 %inc1 to i64
|
|
%arrayidx8 = getelementptr inbounds i32, ptr %PtrB, i64 %idxprom7
|
|
store i32 %2, ptr %arrayidx8, align 4
|
|
br label %block4
|
|
|
|
if.end9: ; preds = %block5
|
|
br label %block6
|
|
|
|
block6: ; preds = %if.end9
|
|
%idxprom10 = sext i32 %inc to i64
|
|
%arrayidx11 = getelementptr inbounds i32, ptr %PtrA, i64 %idxprom10
|
|
%3 = load i32, ptr %arrayidx11, align 4
|
|
%inc12 = add nsw i32 %3, 1
|
|
store i32 %inc12, ptr %arrayidx11, align 4
|
|
br label %block8
|
|
|
|
block8: ; preds = %block6
|
|
br label %block3
|
|
}
|