mflr is kind of expensive on Power version smaller than 10, so we should schedule the store for the mflr's def away from mflr. In epilogue, the expensive mtlr has no user for its def, so it doesn't matter that the load and the mtlr are back-to-back. Reviewed By: RolandF Differential Revision: https://reviews.llvm.org/D137423
181 lines
6.9 KiB
LLVM
181 lines
6.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \
|
|
; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s
|
|
@a = local_unnamed_addr global float* null, align 8
|
|
|
|
; Function Attrs: nounwind
|
|
define void @d() local_unnamed_addr #0 {
|
|
; CHECK-LABEL: d:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -208(r1)
|
|
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
|
|
; CHECK-NEXT: std r0, 224(r1)
|
|
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
|
|
; CHECK-NEXT: std r29, 184(r1) # 8-byte Folded Spill
|
|
; CHECK-NEXT: std r30, 192(r1) # 8-byte Folded Spill
|
|
; CHECK-NEXT: ld r29, 0(r3)
|
|
; CHECK-NEXT: bl c
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: mr r30, r3
|
|
; CHECK-NEXT: bl b
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: cmpwi r30, 0
|
|
; CHECK-NEXT: ble cr0, .LBB0_9
|
|
; CHECK-NEXT: # %bb.1: # %for.body.preheader
|
|
; CHECK-NEXT: cmplwi r30, 4
|
|
; CHECK-NEXT: clrldi r4, r30, 32
|
|
; CHECK-NEXT: li r5, 0
|
|
; CHECK-NEXT: blt cr0, .LBB0_7
|
|
; CHECK-NEXT: # %bb.2: # %vector.memcheck
|
|
; CHECK-NEXT: rldic r6, r30, 2, 30
|
|
; CHECK-NEXT: add r7, r3, r6
|
|
; CHECK-NEXT: cmpld r29, r7
|
|
; CHECK-NEXT: add r6, r29, r6
|
|
; CHECK-NEXT: bc 4, lt, .LBB0_4
|
|
; CHECK-NEXT: # %bb.3: # %vector.memcheck
|
|
; CHECK-NEXT: cmpld r3, r6
|
|
; CHECK-NEXT: bc 12, lt, .LBB0_7
|
|
; CHECK-NEXT: .LBB0_4: # %vector.ph
|
|
; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29
|
|
; CHECK-NEXT: li r7, 15
|
|
; CHECK-NEXT: addi r6, r5, -4
|
|
; CHECK-NEXT: addi r8, r1, 144
|
|
; CHECK-NEXT: rldicl r6, r6, 62, 2
|
|
; CHECK-NEXT: addi r9, r1, 128
|
|
; CHECK-NEXT: addi r6, r6, 1
|
|
; CHECK-NEXT: addi r10, r1, 160
|
|
; CHECK-NEXT: mtctr r6
|
|
; CHECK-NEXT: li r6, 0
|
|
; CHECK-NEXT: addi r11, r1, 112
|
|
; CHECK-NEXT: .LBB0_5: # %vector.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: add r12, r3, r6
|
|
; CHECK-NEXT: lvx v3, r3, r6
|
|
; CHECK-NEXT: lvx v5, r12, r7
|
|
; CHECK-NEXT: add r12, r29, r6
|
|
; CHECK-NEXT: lvsl v2, r3, r6
|
|
; CHECK-NEXT: vperm v2, v3, v5, v2
|
|
; CHECK-NEXT: lvx v3, r29, r6
|
|
; CHECK-NEXT: lvx v5, r12, r7
|
|
; CHECK-NEXT: lvsl v4, r29, r6
|
|
; CHECK-NEXT: stvx v2, 0, r8
|
|
; CHECK-NEXT: vperm v2, v3, v5, v4
|
|
; CHECK-NEXT: stvx v2, 0, r9
|
|
; CHECK-NEXT: lfs f0, 156(r1)
|
|
; CHECK-NEXT: lfs f1, 140(r1)
|
|
; CHECK-NEXT: fdivs f0, f1, f0
|
|
; CHECK-NEXT: lfs f1, 136(r1)
|
|
; CHECK-NEXT: stfs f0, 172(r1)
|
|
; CHECK-NEXT: lfs f0, 152(r1)
|
|
; CHECK-NEXT: fdivs f0, f1, f0
|
|
; CHECK-NEXT: lfs f1, 132(r1)
|
|
; CHECK-NEXT: stfs f0, 168(r1)
|
|
; CHECK-NEXT: lfs f0, 148(r1)
|
|
; CHECK-NEXT: fdivs f0, f1, f0
|
|
; CHECK-NEXT: lfs f1, 128(r1)
|
|
; CHECK-NEXT: stfs f0, 164(r1)
|
|
; CHECK-NEXT: lfs f0, 144(r1)
|
|
; CHECK-NEXT: fdivs f0, f1, f0
|
|
; CHECK-NEXT: stfs f0, 160(r1)
|
|
; CHECK-NEXT: lvx v2, 0, r10
|
|
; CHECK-NEXT: stvx v2, 0, r11
|
|
; CHECK-NEXT: ld r0, 112(r1)
|
|
; CHECK-NEXT: stdx r0, r29, r6
|
|
; CHECK-NEXT: addi r6, r6, 16
|
|
; CHECK-NEXT: ld r0, 120(r1)
|
|
; CHECK-NEXT: std r0, 8(r12)
|
|
; CHECK-NEXT: bdnz .LBB0_5
|
|
; CHECK-NEXT: # %bb.6: # %middle.block
|
|
; CHECK-NEXT: cmpld r5, r4
|
|
; CHECK-NEXT: beq cr0, .LBB0_9
|
|
; CHECK-NEXT: .LBB0_7: # %for.body.preheader18
|
|
; CHECK-NEXT: sldi r6, r5, 2
|
|
; CHECK-NEXT: sub r5, r4, r5
|
|
; CHECK-NEXT: addi r6, r6, -4
|
|
; CHECK-NEXT: add r3, r3, r6
|
|
; CHECK-NEXT: add r4, r29, r6
|
|
; CHECK-NEXT: mtctr r5
|
|
; CHECK-NEXT: .LBB0_8: # %for.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: lfsu f0, 4(r4)
|
|
; CHECK-NEXT: lfsu f1, 4(r3)
|
|
; CHECK-NEXT: fdivs f0, f0, f1
|
|
; CHECK-NEXT: stfs f0, 0(r4)
|
|
; CHECK-NEXT: bdnz .LBB0_8
|
|
; CHECK-NEXT: .LBB0_9: # %for.end
|
|
; CHECK-NEXT: ld r30, 192(r1) # 8-byte Folded Reload
|
|
; CHECK-NEXT: ld r29, 184(r1) # 8-byte Folded Reload
|
|
; CHECK-NEXT: addi r1, r1, 208
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%0 = load float*, float** @a, align 8
|
|
%call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2
|
|
%call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2
|
|
%cmp11 = icmp sgt i32 %call, 0
|
|
br i1 %cmp11, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %call to i64
|
|
%min.iters.check = icmp ult i32 %call, 4
|
|
br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck
|
|
|
|
vector.memcheck: ; preds = %for.body.preheader
|
|
%scevgep = getelementptr float, float* %0, i64 %wide.trip.count
|
|
%scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count
|
|
%bound0 = icmp ult float* %0, %scevgep15
|
|
%bound1 = icmp ult float* %call1, %scevgep
|
|
%found.conflict = and i1 %bound0, %bound1
|
|
br i1 %found.conflict, label %for.body.preheader18, label %vector.ph
|
|
|
|
vector.ph: ; preds = %vector.memcheck
|
|
%n.vec = and i64 %wide.trip.count, 4294967292
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %vector.ph
|
|
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
|
%1 = getelementptr inbounds float, float* %call1, i64 %index
|
|
%2 = bitcast float* %1 to <4 x float>*
|
|
%wide.load = load <4 x float>, <4 x float>* %2, align 4
|
|
%3 = getelementptr inbounds float, float* %0, i64 %index
|
|
%4 = bitcast float* %3 to <4 x float>*
|
|
%wide.load17 = load <4 x float>, <4 x float>* %4, align 4
|
|
%5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load
|
|
%6 = bitcast float* %3 to <4 x float>*
|
|
store <4 x float> %5, <4 x float>* %6, align 4
|
|
%index.next = add i64 %index, 4
|
|
%7 = icmp eq i64 %index.next, %n.vec
|
|
br i1 %7, label %middle.block, label %vector.body
|
|
|
|
middle.block: ; preds = %vector.body
|
|
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
|
|
br i1 %cmp.n, label %for.end, label %for.body.preheader18
|
|
|
|
for.body.preheader18: ; preds = %middle.block, %vector.memcheck, %for.body.preheader
|
|
%indvars.iv.ph = phi i64 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body.preheader18, %for.body
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ]
|
|
%arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv
|
|
%8 = load float, float* %arrayidx, align 4
|
|
%arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv
|
|
%9 = load float, float* %arrayidx3, align 4
|
|
%div = fdiv reassoc nsz arcp afn float %9, %8
|
|
store float %div, float* %arrayidx3, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %middle.block, %entry
|
|
ret void
|
|
}
|
|
|
|
declare signext i32 @c(...) local_unnamed_addr #1
|
|
|
|
declare float* @b(...) local_unnamed_addr #1
|
|
|
|
attributes #0 = { nounwind }
|