Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
114 lines
3.1 KiB
LLVM
114 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
|
|
|
|
define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: sub_i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: sub d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = sub i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define void @add_i64_ext_load_store(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_load_store:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: str d0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
store i64 %c, ptr %B
|
|
ret void
|
|
}
|
|
|
|
define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_v2i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: add x0, x9, x8
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <2 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_ext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <1 x i64> %B, i32 0
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i32_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: add w0, w9, w8
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i32> %A, i32 0
|
|
%b = load i32, ptr %B
|
|
%c = add i32 %a, %b
|
|
ret i32 %c
|
|
}
|
|
|
|
define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_ext_test1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: add d0, d0, d2
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <2 x i64> %B, i32 0
|
|
%c = extractelement <2 x i64> %B, i32 1
|
|
%d = add i64 %a, %b
|
|
%e = add i64 %d, %c
|
|
ret i64 %e
|
|
}
|
|
|
|
define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
|
|
; CHECK-LABEL: sub_i64_ext_ext_test1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
; CHECK-NEXT: sub d0, d0, d1
|
|
; CHECK-NEXT: sub d0, d0, d2
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <2 x i64> %B, i32 0
|
|
%c = extractelement <2 x i64> %B, i32 1
|
|
%d = sub i64 %a, %b
|
|
%e = sub i64 %d, %c
|
|
ret i64 %e
|
|
}
|