The function of 'fscale' is equivalent to mathlib call ldexp, but has better performance. This patch lowers ldexp into fscale when sve is enabled.
64 lines
1.9 KiB
LLVM
64 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
|
|
|
|
define double @testExp(double %val, i32 %a) {
|
|
; CHECK-LABEL: testExp:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-NEXT: sxtw x8, w0
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fmov d1, x8
|
|
; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%call = tail call fast double @ldexp(double %val, i32 %a)
|
|
ret double %call
|
|
}
|
|
|
|
declare double @ldexp(double, i32) memory(none)
|
|
|
|
define float @testExpf(float %val, i32 %a) {
|
|
; CHECK-LABEL: testExpf:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmov s1, w0
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
|
|
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%call = tail call fast float @ldexpf(float %val, i32 %a)
|
|
ret float %call
|
|
}
|
|
|
|
declare float @ldexpf(float, i32) memory(none)
|
|
|
|
define fp128 @testExpl(fp128 %val, i32 %a) {
|
|
; CHECK-LABEL: testExpl:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: b ldexpl
|
|
entry:
|
|
%call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
|
|
ret fp128 %call
|
|
}
|
|
|
|
declare fp128 @ldexpl(fp128, i32) memory(none)
|
|
|
|
define half @testExpf16(half %val, i32 %a) {
|
|
; CHECK-LABEL: testExpf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
; CHECK-NEXT: fmov s1, w0
|
|
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
|
|
; CHECK-NEXT: fcvt h0, s0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
|
|
ret half %0
|
|
}
|
|
|
|
declare half @llvm.ldexp.f16.i32(half, i32) memory(none)
|