Files
clang-p2996/llvm/test/CodeGen/X86/hipe-cc64.ll
Philip Reames 86eff6be68 [MachineCombiner] Use default latency model when no detailed model available
This change adjusts the cost modeling used when the target does not have a schedule model with individual instruction latencies. After this change, we use the default latency information available from TargetSchedule. The default latency information essentially ends up treating most instructions as latency 1, with a few "expensive" ones getting a higher cost.

Previously, we unconditionally applied the first legal pattern - without any consideration of profitability. As a result, this change both prevents some patterns being applied, and changes which patterns are exercised. (i.e. previously the first pattern was applied, afterwards, maybe the second one is because the first wasn't profitable.)

The motivation here is two fold.

First, this brings the default behavior in line with the behavior when -mcpu or -mtune is specified. This improves test coverage, and generally makes it less likely we will have bad surprises when providing more information to the compiler.

Second, this enables some reassociation for ILP by default. Despite being unconditionally enabled, the prior code tended to "reassociate" repeatedly through an entire chain and simply moving the first operand to the end. The result was still a serial chain, just a different one. With this change, one of the intermediate transforms is unprofitable and we end up with a partially flattened tree.

Note that the resulting code diffs show significant room for improvement in the basic algorithm. I am intentionally excluding those from this patch.

For the test diffs, I don't seen any concerning regressions. I took a fairly close look at the RISCV ones, but only skimmed the x86 (particularly vector x86) changes.

Differential Revision: https://reviews.llvm.org/D141017
2023-01-20 09:28:20 -08:00

136 lines
4.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -relocation-model=static -code-model=medium -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
; Check the HiPE calling convention works (x86-64)
define void @zap(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: zap:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: movl $8, %ecx
; CHECK-NEXT: movl $9, %r8d
; CHECK-NEXT: movq %rdi, %rsi
; CHECK-NEXT: callq addfour@PLT
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: movl $2, %ecx
; CHECK-NEXT: movl $3, %r8d
; CHECK-NEXT: movq %rax, %r9
; CHECK-NEXT: callq foo@PLT
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
entry:
%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
%res = extractvalue {i64, i64, i64} %0, 2
tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
ret void
}
define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
; CHECK-LABEL: addfour:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: leaq (%rcx,%r8), %rax
; CHECK-NEXT: addq %rdx, %rax
; CHECK-NEXT: retq
entry:
%0 = add i64 %x, %y
%1 = add i64 %0, %z
%2 = add i64 %1, %w
%res = insertvalue {i64, i64, i64} undef, i64 %2, 2
ret {i64, i64, i64} %res
}
define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $48, %rsp
; CHECK-NEXT: movq %r15, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rbp, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %r8, (%rsp)
; CHECK-NEXT: addq $48, %rsp
; CHECK-NEXT: jmp bar@PLT # TAILCALL
entry:
%hp_var = alloca i64
%p_var = alloca i64
%arg0_var = alloca i64
%arg1_var = alloca i64
%arg2_var = alloca i64
%arg3_var = alloca i64
store i64 %hp, ptr %hp_var
store i64 %p, ptr %p_var
store i64 %arg0, ptr %arg0_var
store i64 %arg1, ptr %arg1_var
store i64 %arg2, ptr %arg2_var
store i64 %arg3, ptr %arg3_var
; Loads are reading values just writen from corresponding register and are therefore noops.
%0 = load i64, ptr %hp_var
%1 = load i64, ptr %p_var
%2 = load i64, ptr %arg0_var
%3 = load i64, ptr %arg1_var
%4 = load i64, ptr %arg2_var
%5 = load i64, ptr %arg3_var
tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
ret void
}
define cc 11 void @baz() nounwind {
; CHECK-LABEL: baz:
; CHECK: # %bb.0:
; CHECK-NEXT: movq clos@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl $42, %esi
; CHECK-NEXT: jmpq *(%rax) # TAILCALL
%tmp_clos = load i64, ptr @clos
%tmp_clos2 = inttoptr i64 %tmp_clos to ptr
tail call cc 11 void %tmp_clos2(i64 undef, i64 undef, i64 42) nounwind
ret void
}
; Sanity-check the tail call sequence. Number of arguments was chosen as to
; expose a bug where the tail call sequence clobbered the stack.
define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 {
; CHECK-LABEL: tailcaller:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $16, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $79, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl $15, %esi
; CHECK-NEXT: movl $31, %edx
; CHECK-NEXT: movl $47, %ecx
; CHECK-NEXT: movl $63, %r8d
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: jmp tailcallee@PLT # TAILCALL
%ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15,
i64 31, i64 47, i64 63, i64 79) #1
ret { i64, i64, i64 } %ret
}
!hipe.literals = !{ !0, !1, !2 }
!0 = !{ !"P_NSP_LIMIT", i32 160 }
!1 = !{ !"X86_LEAF_WORDS", i32 24 }
!2 = !{ !"AMD64_LEAF_WORDS", i32 24 }
@clos = external constant i64
declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)
declare cc 11 { i64, i64, i64 } @tailcallee(i64, i64, i64, i64, i64, i64, i64)
!llvm.module.flags = !{!3}
!3 = !{i32 2, !"override-stack-alignment", i32 8}