Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
263 lines
9.3 KiB
LLVM
263 lines
9.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
|
; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s -check-prefix=CHECK-WIN
|
|
; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s -check-prefix=CHECK-DARWIN
|
|
|
|
; The Windows runtime doesn't have these.
|
|
; CHECK-WIN-NOT: __ashlti3
|
|
; CHECK-WIN-NOT: __ashrti3
|
|
|
|
; Darwin compiler-rt excludes these.
|
|
; CHECK-DARWIN-NOT: __ashlti3
|
|
; CHECK-DARWIN-NOT: __ashrti3
|
|
|
|
define i64 @f0(i64 %val, i64 %amt) minsize optsize {
|
|
; CHECK-LABEL: f0:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsl x0, x0, x1
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: f0:
|
|
; CHECK-WIN: // %bb.0:
|
|
; CHECK-WIN-NEXT: lsl x0, x0, x1
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: f0:
|
|
; CHECK-DARWIN: ; %bb.0:
|
|
; CHECK-DARWIN-NEXT: lsl x0, x0, x1
|
|
; CHECK-DARWIN-NEXT: ret
|
|
%res = shl i64 %val, %amt
|
|
ret i64 %res
|
|
}
|
|
|
|
define i32 @f1(i64 %x, i64 %y) minsize optsize {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsl x0, x0, x1
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: f1:
|
|
; CHECK-WIN: // %bb.0:
|
|
; CHECK-WIN-NEXT: lsl x0, x0, x1
|
|
; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: f1:
|
|
; CHECK-DARWIN: ; %bb.0:
|
|
; CHECK-DARWIN-NEXT: lsl x0, x0, x1
|
|
; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-DARWIN-NEXT: ret
|
|
%a = shl i64 %x, %y
|
|
%b = trunc i64 %a to i32
|
|
ret i32 %b
|
|
}
|
|
|
|
define i32 @f2(i64 %x, i64 %y) minsize optsize {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: asr x0, x0, x1
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: f2:
|
|
; CHECK-WIN: // %bb.0:
|
|
; CHECK-WIN-NEXT: asr x0, x0, x1
|
|
; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: f2:
|
|
; CHECK-DARWIN: ; %bb.0:
|
|
; CHECK-DARWIN-NEXT: asr x0, x0, x1
|
|
; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-DARWIN-NEXT: ret
|
|
%a = ashr i64 %x, %y
|
|
%b = trunc i64 %a to i32
|
|
ret i32 %b
|
|
}
|
|
|
|
define i32 @f3(i64 %x, i64 %y) minsize optsize {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: lsr x0, x0, x1
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: f3:
|
|
; CHECK-WIN: // %bb.0:
|
|
; CHECK-WIN-NEXT: lsr x0, x0, x1
|
|
; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: f3:
|
|
; CHECK-DARWIN: ; %bb.0:
|
|
; CHECK-DARWIN-NEXT: lsr x0, x0, x1
|
|
; CHECK-DARWIN-NEXT: ; kill: def $w0 killed $w0 killed $x0
|
|
; CHECK-DARWIN-NEXT: ret
|
|
%a = lshr i64 %x, %y
|
|
%b = trunc i64 %a to i32
|
|
ret i32 %b
|
|
}
|
|
|
|
define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
|
; CHECK-LABEL: shl128:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: bl __ashlti3
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: shl128:
|
|
; CHECK-WIN: // %bb.0: // %entry
|
|
; CHECK-WIN-NEXT: lsr x8, x0, #1
|
|
; CHECK-WIN-NEXT: mvn w9, w2
|
|
; CHECK-WIN-NEXT: mov w10, w2
|
|
; CHECK-WIN-NEXT: lsl x11, x0, x10
|
|
; CHECK-WIN-NEXT: tst x10, #0x40
|
|
; CHECK-WIN-NEXT: lsr x8, x8, x9
|
|
; CHECK-WIN-NEXT: lsl x9, x1, x10
|
|
; CHECK-WIN-NEXT: csel x0, xzr, x11, ne
|
|
; CHECK-WIN-NEXT: orr x8, x9, x8
|
|
; CHECK-WIN-NEXT: csel x1, x11, x8, ne
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: shl128:
|
|
; CHECK-DARWIN: ; %bb.0: ; %entry
|
|
; CHECK-DARWIN-NEXT: lsr x8, x0, #1
|
|
; CHECK-DARWIN-NEXT: mvn w9, w2
|
|
; CHECK-DARWIN-NEXT: mov w10, w2
|
|
; CHECK-DARWIN-NEXT: lsl x11, x0, x10
|
|
; CHECK-DARWIN-NEXT: tst x10, #0x40
|
|
; CHECK-DARWIN-NEXT: lsr x8, x8, x9
|
|
; CHECK-DARWIN-NEXT: lsl x9, x1, x10
|
|
; CHECK-DARWIN-NEXT: csel x0, xzr, x11, ne
|
|
; CHECK-DARWIN-NEXT: orr x8, x9, x8
|
|
; CHECK-DARWIN-NEXT: csel x1, x11, x8, ne
|
|
; CHECK-DARWIN-NEXT: ret
|
|
|
|
entry:
|
|
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
|
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
|
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
|
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
|
%conv = sext i8 %y to i32
|
|
%sh_prom = zext i32 %conv to i128
|
|
%shl = shl i128 %x.sroa.0.0.insert.insert, %sh_prom
|
|
%retval.sroa.0.0.extract.trunc = trunc i128 %shl to i64
|
|
%retval.sroa.2.0.extract.shift = lshr i128 %shl, 64
|
|
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
|
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
|
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
|
ret { i64, i64 } %.fca.1.insert
|
|
}
|
|
|
|
define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
|
; CHECK-LABEL: ashr128:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: bl __ashrti3
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: ashr128:
|
|
; CHECK-WIN: // %bb.0: // %entry
|
|
; CHECK-WIN-NEXT: lsl x8, x1, #1
|
|
; CHECK-WIN-NEXT: mov w9, w2
|
|
; CHECK-WIN-NEXT: mvn w10, w2
|
|
; CHECK-WIN-NEXT: lsr x11, x0, x9
|
|
; CHECK-WIN-NEXT: asr x12, x1, #63
|
|
; CHECK-WIN-NEXT: tst x9, #0x40
|
|
; CHECK-WIN-NEXT: lsl x8, x8, x10
|
|
; CHECK-WIN-NEXT: asr x10, x1, x9
|
|
; CHECK-WIN-NEXT: orr x8, x8, x11
|
|
; CHECK-WIN-NEXT: csel x1, x12, x10, ne
|
|
; CHECK-WIN-NEXT: csel x0, x10, x8, ne
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: ashr128:
|
|
; CHECK-DARWIN: ; %bb.0: ; %entry
|
|
; CHECK-DARWIN-NEXT: lsl x8, x1, #1
|
|
; CHECK-DARWIN-NEXT: mov w9, w2
|
|
; CHECK-DARWIN-NEXT: mvn w10, w2
|
|
; CHECK-DARWIN-NEXT: lsr x11, x0, x9
|
|
; CHECK-DARWIN-NEXT: asr x12, x1, #63
|
|
; CHECK-DARWIN-NEXT: tst x9, #0x40
|
|
; CHECK-DARWIN-NEXT: lsl x8, x8, x10
|
|
; CHECK-DARWIN-NEXT: asr x10, x1, x9
|
|
; CHECK-DARWIN-NEXT: orr x8, x8, x11
|
|
; CHECK-DARWIN-NEXT: csel x1, x12, x10, ne
|
|
; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne
|
|
; CHECK-DARWIN-NEXT: ret
|
|
entry:
|
|
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
|
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
|
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
|
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
|
%conv = sext i8 %y to i32
|
|
%sh_prom = zext i32 %conv to i128
|
|
%shr = ashr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
|
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
|
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
|
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
|
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
|
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
|
ret { i64, i64 } %.fca.1.insert
|
|
}
|
|
|
|
define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
|
; CHECK-LABEL: lshr128:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
; CHECK-NEXT: bl __lshrti3
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-WIN-LABEL: lshr128:
|
|
; CHECK-WIN: // %bb.0: // %entry
|
|
; CHECK-WIN-NEXT: lsl x8, x1, #1
|
|
; CHECK-WIN-NEXT: mov w9, w2
|
|
; CHECK-WIN-NEXT: mvn w10, w2
|
|
; CHECK-WIN-NEXT: lsr x11, x0, x9
|
|
; CHECK-WIN-NEXT: tst x9, #0x40
|
|
; CHECK-WIN-NEXT: lsl x8, x8, x10
|
|
; CHECK-WIN-NEXT: lsr x10, x1, x9
|
|
; CHECK-WIN-NEXT: orr x8, x8, x11
|
|
; CHECK-WIN-NEXT: csel x1, xzr, x10, ne
|
|
; CHECK-WIN-NEXT: csel x0, x10, x8, ne
|
|
; CHECK-WIN-NEXT: ret
|
|
;
|
|
; CHECK-DARWIN-LABEL: lshr128:
|
|
; CHECK-DARWIN: ; %bb.0: ; %entry
|
|
; CHECK-DARWIN-NEXT: lsl x8, x1, #1
|
|
; CHECK-DARWIN-NEXT: mov w9, w2
|
|
; CHECK-DARWIN-NEXT: mvn w10, w2
|
|
; CHECK-DARWIN-NEXT: lsr x11, x0, x9
|
|
; CHECK-DARWIN-NEXT: tst x9, #0x40
|
|
; CHECK-DARWIN-NEXT: lsl x8, x8, x10
|
|
; CHECK-DARWIN-NEXT: lsr x10, x1, x9
|
|
; CHECK-DARWIN-NEXT: orr x8, x8, x11
|
|
; CHECK-DARWIN-NEXT: csel x1, xzr, x10, ne
|
|
; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne
|
|
; CHECK-DARWIN-NEXT: ret
|
|
entry:
|
|
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
|
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
|
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
|
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
|
%conv = sext i8 %y to i32
|
|
%sh_prom = zext i32 %conv to i128
|
|
%shr = lshr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
|
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
|
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
|
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
|
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
|
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
|
ret { i64, i64 } %.fca.1.insert
|
|
}
|