[AArch64] Change IssueWidth to 5 in AArch64SchedNeoverseN2.td (#145717)

It has been observed that the issue width for neoverse-n2 CPUs is set
too high, and does not properly reflect the dispatch constraints.

I tested various values of IssueWidth (10, 8, 6, 5, 4) with runs of
various workloads on a neoverse-n2 machine and I got the highest overall
geomean score with an issue width of 5.

If this patch were to cause any major regression post-commit, it could
be easily reverted, but it is likely to show an overall improvement.

Related Neoverse-V2 PR: https://github.com/llvm/llvm-project/pull/142565
This commit is contained in:
Simon Wallis
2025-06-30 17:12:02 +01:00
committed by GitHub
parent 18a0675082
commit a1d83311c8
4 changed files with 1996 additions and 1992 deletions

View File

@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
def NeoverseN2Model : SchedMachineModel {
let IssueWidth = 10; // Micro-ops dispatched at a time.
let IssueWidth = 5; // Micro-ops dispatched at a time.
let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 10; // Extra cycles for mispredicted branch.

View File

@@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
; CHECK-UNSAFE-NEXT: fadd h2, h3, h2
; CHECK-UNSAFE-NEXT: fadd h0, h2, h0
; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fadd half %x2, %t0
@@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_muls_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
; CHECK-UNSAFE-NEXT: fmul h2, h3, h2
; CHECK-UNSAFE-NEXT: fmul h0, h2, h0
; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fmul half %x2, %t0

View File

@@ -5066,19 +5066,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 1.00 movs p0.b, p0/z, p0.b
# CHECK-NEXT: 2 2 1.00 movs p15.b, p15.b
# CHECK-NEXT: 2 2 1.00 movs p15.b, p15/z, p15.b
# CHECK-NEXT: 1 1 0.10 U mrs x3, ID_AA64ZFR0_EL1
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL1
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL12
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL2
# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL3
# CHECK-NEXT: 1 1 0.20 U mrs x3, ID_AA64ZFR0_EL1
# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL1
# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL12
# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL2
# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL3
# CHECK-NEXT: 1 4 1.00 msb z0.b, p7/m, z1.b, z31.b
# CHECK-NEXT: 2 5 2.00 msb z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 4 1.00 msb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: 1 4 1.00 msb z0.s, p7/m, z1.s, z31.s
# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL1, x3
# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL12, x3
# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL2, x3
# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL3, x3
# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL1, x3
# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL12, x3
# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL2, x3
# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL3, x3
# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b
# CHECK-NEXT: 1 4 1.00 mul z0.b, z1.b, z2.b
# CHECK-NEXT: 2 5 2.00 mul z0.d, p7/m, z0.d, z31.d

File diff suppressed because it is too large Load Diff