Files
clang-p2996/llvm/test/CodeGen/PowerPC/pr47373.ll
Chen Zheng eb7d16ea25 [PowerPC] make expensive mflr be away from its user in the function prologue
mflr is kind of expensive on Power version smaller than 10, so we should
schedule the store for the mflr's def away from mflr.

In epilogue, the expensive mtlr has no user for its def, so it doesn't
matter that the load and the mtlr are back-to-back.

Reviewed By: RolandF

Differential Revision: https://reviews.llvm.org/D137423
2022-11-14 21:14:20 -05:00

181 lines
6.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \
; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s
@a = local_unnamed_addr global float* null, align 8
; Function Attrs: nounwind
define void @d() local_unnamed_addr #0 {
; CHECK-LABEL: d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stdu r1, -208(r1)
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: std r0, 224(r1)
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r29, 0(r3)
; CHECK-NEXT: bl c
; CHECK-NEXT: nop
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: bl b
; CHECK-NEXT: nop
; CHECK-NEXT: cmpwi r30, 0
; CHECK-NEXT: ble cr0, .LBB0_9
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: cmplwi r30, 4
; CHECK-NEXT: clrldi r4, r30, 32
; CHECK-NEXT: li r5, 0
; CHECK-NEXT: blt cr0, .LBB0_7
; CHECK-NEXT: # %bb.2: # %vector.memcheck
; CHECK-NEXT: rldic r6, r30, 2, 30
; CHECK-NEXT: add r7, r3, r6
; CHECK-NEXT: cmpld r29, r7
; CHECK-NEXT: add r6, r29, r6
; CHECK-NEXT: bc 4, lt, .LBB0_4
; CHECK-NEXT: # %bb.3: # %vector.memcheck
; CHECK-NEXT: cmpld r3, r6
; CHECK-NEXT: bc 12, lt, .LBB0_7
; CHECK-NEXT: .LBB0_4: # %vector.ph
; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29
; CHECK-NEXT: li r7, 15
; CHECK-NEXT: addi r6, r5, -4
; CHECK-NEXT: addi r8, r1, 144
; CHECK-NEXT: rldicl r6, r6, 62, 2
; CHECK-NEXT: addi r9, r1, 128
; CHECK-NEXT: addi r6, r6, 1
; CHECK-NEXT: addi r10, r1, 160
; CHECK-NEXT: mtctr r6
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: addi r11, r1, 112
; CHECK-NEXT: .LBB0_5: # %vector.body
; CHECK-NEXT: #
; CHECK-NEXT: add r12, r3, r6
; CHECK-NEXT: lvx v3, r3, r6
; CHECK-NEXT: lvx v5, r12, r7
; CHECK-NEXT: add r12, r29, r6
; CHECK-NEXT: lvsl v2, r3, r6
; CHECK-NEXT: vperm v2, v3, v5, v2
; CHECK-NEXT: lvx v3, r29, r6
; CHECK-NEXT: lvx v5, r12, r7
; CHECK-NEXT: lvsl v4, r29, r6
; CHECK-NEXT: stvx v2, 0, r8
; CHECK-NEXT: vperm v2, v3, v5, v4
; CHECK-NEXT: stvx v2, 0, r9
; CHECK-NEXT: lfs f0, 156(r1)
; CHECK-NEXT: lfs f1, 140(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 136(r1)
; CHECK-NEXT: stfs f0, 172(r1)
; CHECK-NEXT: lfs f0, 152(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 132(r1)
; CHECK-NEXT: stfs f0, 168(r1)
; CHECK-NEXT: lfs f0, 148(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: lfs f1, 128(r1)
; CHECK-NEXT: stfs f0, 164(r1)
; CHECK-NEXT: lfs f0, 144(r1)
; CHECK-NEXT: fdivs f0, f1, f0
; CHECK-NEXT: stfs f0, 160(r1)
; CHECK-NEXT: lvx v2, 0, r10
; CHECK-NEXT: stvx v2, 0, r11
; CHECK-NEXT: ld r0, 112(r1)
; CHECK-NEXT: stdx r0, r29, r6
; CHECK-NEXT: addi r6, r6, 16
; CHECK-NEXT: ld r0, 120(r1)
; CHECK-NEXT: std r0, 8(r12)
; CHECK-NEXT: bdnz .LBB0_5
; CHECK-NEXT: # %bb.6: # %middle.block
; CHECK-NEXT: cmpld r5, r4
; CHECK-NEXT: beq cr0, .LBB0_9
; CHECK-NEXT: .LBB0_7: # %for.body.preheader18
; CHECK-NEXT: sldi r6, r5, 2
; CHECK-NEXT: sub r5, r4, r5
; CHECK-NEXT: addi r6, r6, -4
; CHECK-NEXT: add r3, r3, r6
; CHECK-NEXT: add r4, r29, r6
; CHECK-NEXT: mtctr r5
; CHECK-NEXT: .LBB0_8: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: lfsu f0, 4(r4)
; CHECK-NEXT: lfsu f1, 4(r3)
; CHECK-NEXT: fdivs f0, f0, f1
; CHECK-NEXT: stfs f0, 0(r4)
; CHECK-NEXT: bdnz .LBB0_8
; CHECK-NEXT: .LBB0_9: # %for.end
; CHECK-NEXT: ld r30, 192(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, 184(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 208
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%0 = load float*, float** @a, align 8
%call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2
%call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2
%cmp11 = icmp sgt i32 %call, 0
br i1 %cmp11, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %call to i64
%min.iters.check = icmp ult i32 %call, 4
br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck
vector.memcheck: ; preds = %for.body.preheader
%scevgep = getelementptr float, float* %0, i64 %wide.trip.count
%scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count
%bound0 = icmp ult float* %0, %scevgep15
%bound1 = icmp ult float* %call1, %scevgep
%found.conflict = and i1 %bound0, %bound1
br i1 %found.conflict, label %for.body.preheader18, label %vector.ph
vector.ph: ; preds = %vector.memcheck
%n.vec = and i64 %wide.trip.count, 4294967292
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%1 = getelementptr inbounds float, float* %call1, i64 %index
%2 = bitcast float* %1 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %2, align 4
%3 = getelementptr inbounds float, float* %0, i64 %index
%4 = bitcast float* %3 to <4 x float>*
%wide.load17 = load <4 x float>, <4 x float>* %4, align 4
%5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load
%6 = bitcast float* %3 to <4 x float>*
store <4 x float> %5, <4 x float>* %6, align 4
%index.next = add i64 %index, 4
%7 = icmp eq i64 %index.next, %n.vec
br i1 %7, label %middle.block, label %vector.body
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
br i1 %cmp.n, label %for.end, label %for.body.preheader18
for.body.preheader18: ; preds = %middle.block, %vector.memcheck, %for.body.preheader
%indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
br label %for.body
for.body: ; preds = %for.body.preheader18, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ]
%arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv
%8 = load float, float* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv
%9 = load float, float* %arrayidx3, align 4
%div = fdiv reassoc nsz arcp afn float %9, %8
store float %div, float* %arrayidx3, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body
for.end: ; preds = %for.body, %middle.block, %entry
ret void
}
declare signext i32 @c(...) local_unnamed_addr #1
declare float* @b(...) local_unnamed_addr #1
attributes #0 = { nounwind }