Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
83 lines
2.7 KiB
LLVM
83 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 < %s -o -| FileCheck %s
|
|
|
|
declare half @llvm.fabs.f16(half)
|
|
declare float @llvm.fabs.f32(float)
|
|
declare double @llvm.fabs.f64(double)
|
|
declare fp128 @llvm.fabs.f128(fp128)
|
|
|
|
; Check if INFINITY for _Float16 is materialized
|
|
define i32 @replace_isinf_call_f16(half %x) {
|
|
; CHECK-LABEL: replace_isinf_call_f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #31744 // =0x7c00
|
|
; CHECK-NEXT: fabs h0, h0
|
|
; CHECK-NEXT: fmov h1, w8
|
|
; CHECK-NEXT: fcmp h0, h1
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%abs = tail call half @llvm.fabs.f16(half %x)
|
|
%cmpinf = fcmp oeq half %abs, 0xH7C00
|
|
%ret = zext i1 %cmpinf to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Check if INFINITY for float is materialized
|
|
define i32 @replace_isinf_call_f32(float %x) {
|
|
; CHECK-LABEL: replace_isinf_call_f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fabs s0, s0
|
|
; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
|
|
; CHECK-NEXT: fmov s1, w8
|
|
; CHECK-NEXT: fcmp s0, s1
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%abs = tail call float @llvm.fabs.f32(float %x)
|
|
%cmpinf = fcmp oeq float %abs, 0x7FF0000000000000
|
|
%ret = zext i1 %cmpinf to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Check if INFINITY for double is materialized
|
|
define i32 @replace_isinf_call_f64(double %x) {
|
|
; CHECK-LABEL: replace_isinf_call_f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fabs d0, d0
|
|
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
|
|
; CHECK-NEXT: fmov d1, x8
|
|
; CHECK-NEXT: fcmp d0, d1
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: ret
|
|
%abs = tail call double @llvm.fabs.f64(double %x)
|
|
%cmpinf = fcmp oeq double %abs, 0x7FF0000000000000
|
|
%ret = zext i1 %cmpinf to i32
|
|
ret i32 %ret
|
|
}
|
|
|
|
; For long double it still requires loading the constant.
|
|
define i32 @replace_isinf_call_f128(fp128 %x) {
|
|
; CHECK-LABEL: replace_isinf_call_f128:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #32
|
|
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: str q0, [sp]
|
|
; CHECK-NEXT: ldrb w8, [sp, #15]
|
|
; CHECK-NEXT: and w8, w8, #0x7f
|
|
; CHECK-NEXT: strb w8, [sp, #15]
|
|
; CHECK-NEXT: adrp x8, .LCPI3_0
|
|
; CHECK-NEXT: ldr q0, [sp]
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
|
|
; CHECK-NEXT: bl __eqtf2
|
|
; CHECK-NEXT: cmp w0, #0
|
|
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-NEXT: cset w0, eq
|
|
; CHECK-NEXT: add sp, sp, #32
|
|
; CHECK-NEXT: ret
|
|
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
|
|
%cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
|
|
%ret = zext i1 %cmpinf to i32
|
|
ret i32 %ret
|
|
}
|