Refresh of the generic scheduling model to use A510 instead of A55. Main benefits are to the little core, and introducing SVE scheduling information. Changes tested on various OoO cores, no performance degradation is seen. Differential Revision: https://reviews.llvm.org/D156799
174 lines
5.4 KiB
LLVM
174 lines
5.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
|
|
|
declare i16 @llvm.sshl.sat.i16(i16, i16)
|
|
declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>)
|
|
|
|
; fold (shlsat undef, x) -> 0
|
|
define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_undef:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 undef, i16 %y)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat x, undef) -> undef
|
|
define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_by_undef:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 undef)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat poison, x) -> 0
|
|
define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_poison:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 poison, i16 %y)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat x, poison) -> undef
|
|
define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_by_poison:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 poison)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat x, bitwidth) -> undef
|
|
define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_by_bitwidth:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 16)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat 0, x) -> 0
|
|
define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shl_zero:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, wzr
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 0, i16 %y)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat x, 0) -> x
|
|
define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_by_zero:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 0)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat c1, c2) -> c3
|
|
define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_constfold:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #32 // =0x20
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat c1, c2) -> sat max
|
|
define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_satmax:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #32767 // =0x7fff
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; fold (shlsat c1, c2) -> sat min
|
|
define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_satmin:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w0, #32768 // =0x8000
|
|
; CHECK-NEXT: ret
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
declare void @sink4xi16(i16, i16, i16, i16)
|
|
|
|
; fold (shlsat c1, c2) -> c3 , c1/c2/c3 being vectors
|
|
define void @combine_shlsat_vector() nounwind {
|
|
; CHECK-LABEL: combine_shlsat_vector:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; CHECK-NEXT: mov w0, #32 // =0x20
|
|
; CHECK-NEXT: mov w1, #32767 // =0x7fff
|
|
; CHECK-NEXT: mov w2, #65504 // =0xffe0
|
|
; CHECK-NEXT: mov w3, #32768 // =0x8000
|
|
; CHECK-NEXT: bl sink4xi16
|
|
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; CHECK-NEXT: ret
|
|
%tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
|
|
<4 x i16><i16 8, i16 8, i16 -8, i16 -8>,
|
|
<4 x i16><i16 2, i16 15, i16 2, i16 15>)
|
|
; Pass elements as arguments in a call to get CHECK statements that verify
|
|
; the constant folding.
|
|
%e0 = extractelement <4 x i16> %tmp, i16 0
|
|
%e1 = extractelement <4 x i16> %tmp, i16 1
|
|
%e2 = extractelement <4 x i16> %tmp, i16 2
|
|
%e3 = extractelement <4 x i16> %tmp, i16 3
|
|
call void @sink4xi16(i16 %e0, i16 %e1, i16 %e2, i16 %e3)
|
|
ret void
|
|
}
|
|
|
|
; Fold shlsat -> shl, if known not to saturate.
|
|
define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_to_shl:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: and w0, w0, #0xfffffffc
|
|
; CHECK-NEXT: ret
|
|
%x2 = ashr i16 %x, 2
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 2)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; Do not fold shlsat -> shl.
|
|
define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sxth w8, w0
|
|
; CHECK-NEXT: mov w9, #-65536 // =0xffff0000
|
|
; CHECK-NEXT: mov w10, #-2147483648 // =0x80000000
|
|
; CHECK-NEXT: ands w8, w9, w8, lsl #14
|
|
; CHECK-NEXT: cinv w10, w10, ge
|
|
; CHECK-NEXT: lsl w9, w8, #3
|
|
; CHECK-NEXT: cmp w8, w9, asr #3
|
|
; CHECK-NEXT: csel w8, w10, w9, ne
|
|
; CHECK-NEXT: asr w0, w8, #16
|
|
; CHECK-NEXT: ret
|
|
%x2 = ashr i16 %x, 2
|
|
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 3)
|
|
ret i16 %tmp
|
|
}
|
|
|
|
; Fold shlsat -> shl, if known not to saturate.
|
|
define <4 x i16> @combine_shlsat_to_shl_vec(<4 x i8> %a) nounwind {
|
|
; CHECK-LABEL: combine_shlsat_to_shl_vec:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl v0.4h, v0.4h, #8
|
|
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
|
|
; CHECK-NEXT: shl v0.4h, v0.4h, #7
|
|
; CHECK-NEXT: ret
|
|
%sext = sext <4 x i8> %a to <4 x i16>
|
|
%tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
|
|
<4 x i16> %sext,
|
|
<4 x i16> <i16 7, i16 7, i16 7, i16 7>)
|
|
ret <4 x i16> %tmp
|
|
}
|