mflr is kind of expensive on Power version smaller than 10, so we should schedule the store for the mflr's def away from mflr. In epilogue, the expensive mtlr has no user for its def, so it doesn't matter that the load and the mtlr are back-to-back. Reviewed By: RolandF Differential Revision: https://reviews.llvm.org/D137423
40 lines
1.3 KiB
LLVM
40 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
|
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=CHECK-P9
|
|
|
|
define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -32(r1)
|
|
; CHECK-NEXT: add r3, r4, r3
|
|
; CHECK-NEXT: std r0, 48(r1)
|
|
; CHECK-NEXT: addi r3, r3, 11
|
|
; CHECK-NEXT: clrlwi r3, r3, 16
|
|
; CHECK-NEXT: bl __gnu_h2f_ieee
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: addi r1, r1, 32
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
;
|
|
; CHECK-P9-LABEL: test2:
|
|
; CHECK-P9: # %bb.0: # %entry
|
|
; CHECK-P9-NEXT: add r3, r4, r3
|
|
; CHECK-P9-NEXT: addi r3, r3, 11
|
|
; CHECK-P9-NEXT: clrlwi r3, r3, 16
|
|
; CHECK-P9-NEXT: mtfprwz f0, r3
|
|
; CHECK-P9-NEXT: xscvhpdp f1, f0
|
|
; CHECK-P9-NEXT: blr
|
|
entry:
|
|
%add = add i64 %b, %a
|
|
%0 = trunc i64 %add to i16
|
|
%conv = add i16 %0, 11
|
|
%call = bitcast i16 %conv to half
|
|
ret half %call
|
|
}
|
|
attributes #0 = { nounwind }
|