Files
clang-p2996/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
David Green bb2d23dcd4 [ARM] Improve detection of fallthough when aligning blocks
We align non-fallthrough branches under Cortex-M at O3 to lead to fewer
instruction fetches. This improves that for the block after a LE or
LETP. These blocks will still have terminating branches until the
LowOverheadLoops pass is run (as they are not handled by analyzeBranch,
the branch is not removed until later), so canFallThrough will return
false. These extra branches will eventually be removed, leaving a
fallthrough, so treat them as such and don't add unnecessary alignments.

Differential Revision: https://reviews.llvm.org/D107810
2021-09-27 11:21:21 +01:00

89 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1-m.main-none-eabi -mcpu=cortex-m55 -O3 < %s | FileCheck %s
define i32 @loop(i32* nocapture readonly %x) {
; CHECK-LABEL: loop:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #500
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB0_1: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r2, [r0], #4
; CHECK-NEXT: add r1, r2
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: pop {r7, pc}
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret i32 %add
for.body: ; preds = %entry, %for.body
%i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%s.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.07
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %s.06
%inc = add nuw nsw i32 %i.07, 1
%exitcond.not = icmp eq i32 %inc, 500
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) {
; CHECK-LABEL: loopif:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: blt .LBB1_4
; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph
; CHECK-NEXT: mov lr, r2
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB1_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r2, [r12], #4
; CHECK-NEXT: smlal r0, r3, r2, r1
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%conv1 = sext i32 %y to i64
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
%s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
ret i64 %s.0.lcssa
for.body: ; preds = %for.body.lr.ph, %for.body
%i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%s.08 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.09
%0 = load i32, i32* %arrayidx, align 4
%conv = sext i32 %0 to i64
%mul = mul nsw i64 %conv, %conv1
%add = add nsw i64 %mul, %s.08
%inc = add nuw nsw i32 %i.09, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}