We align non-fallthrough branches under Cortex-M at O3 to lead to fewer instruction fetches. This improves that for the block after a LE or LETP. These blocks will still have terminating branches until the LowOverheadLoops pass is run (as they are not handled by analyzeBranch, the branch is not removed until later), so canFallThrough will return false. These extra branches will eventually be removed, leaving a fallthrough, so treat them as such and don't add unnecessary alignments. Differential Revision: https://reviews.llvm.org/D107810
89 lines
3.0 KiB
LLVM
89 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1-m.main-none-eabi -mcpu=cortex-m55 -O3 < %s | FileCheck %s
|
|
|
|
define i32 @loop(i32* nocapture readonly %x) {
|
|
; CHECK-LABEL: loop:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: mov.w lr, #500
|
|
; CHECK-NEXT: movs r1, #0
|
|
; CHECK-NEXT: .p2align 2
|
|
; CHECK-NEXT: .LBB0_1: @ %for.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr r2, [r0], #4
|
|
; CHECK-NEXT: add r1, r2
|
|
; CHECK-NEXT: le lr, .LBB0_1
|
|
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
|
; CHECK-NEXT: mov r0, r1
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret i32 %add
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%s.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.07
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%add = add nsw i32 %0, %s.06
|
|
%inc = add nuw nsw i32 %i.07, 1
|
|
%exitcond.not = icmp eq i32 %inc, 500
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) {
|
|
; CHECK-LABEL: loopif:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: blt .LBB1_4
|
|
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
|
|
; CHECK-NEXT: mov lr, r2
|
|
; CHECK-NEXT: dls lr, r2
|
|
; CHECK-NEXT: mov r12, r0
|
|
; CHECK-NEXT: movs r0, #0
|
|
; CHECK-NEXT: movs r3, #0
|
|
; CHECK-NEXT: .p2align 2
|
|
; CHECK-NEXT: .LBB1_2: @ %for.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: ldr r2, [r12], #4
|
|
; CHECK-NEXT: smlal r0, r3, r2, r1
|
|
; CHECK-NEXT: le lr, .LBB1_2
|
|
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
|
; CHECK-NEXT: mov r1, r3
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
; CHECK-NEXT: .p2align 2
|
|
; CHECK-NEXT: .LBB1_4:
|
|
; CHECK-NEXT: movs r0, #0
|
|
; CHECK-NEXT: movs r3, #0
|
|
; CHECK-NEXT: mov r1, r3
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp7 = icmp sgt i32 %n, 0
|
|
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%conv1 = sext i32 %y to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
%s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
|
ret i64 %s.0.lcssa
|
|
|
|
for.body: ; preds = %for.body.lr.ph, %for.body
|
|
%i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%s.08 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.09
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%conv = sext i32 %0 to i64
|
|
%mul = mul nsw i64 %conv, %conv1
|
|
%add = add nsw i64 %mul, %s.08
|
|
%inc = add nuw nsw i32 %i.09, 1
|
|
%exitcond.not = icmp eq i32 %inc, %n
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|