This change adjusts the cost modeling used when the target does not have a schedule model with individual instruction latencies. After this change, we use the default latency information available from TargetSchedule. The default latency information essentially ends up treating most instructions as latency 1, with a few "expensive" ones getting a higher cost. Previously, we unconditionally applied the first legal pattern - without any consideration of profitability. As a result, this change both prevents some patterns being applied, and changes which patterns are exercised. (i.e. previously the first pattern was applied, afterwards, maybe the second one is because the first wasn't profitable.) The motivation here is two fold. First, this brings the default behavior in line with the behavior when -mcpu or -mtune is specified. This improves test coverage, and generally makes it less likely we will have bad surprises when providing more information to the compiler. Second, this enables some reassociation for ILP by default. Despite being unconditionally enabled, the prior code tended to "reassociate" repeatedly through an entire chain and simply moving the first operand to the end. The result was still a serial chain, just a different one. With this change, one of the intermediate transforms is unprofitable and we end up with a partially flattened tree. Note that the resulting code diffs show significant room for improvement in the basic algorithm. I am intentionally excluding those from this patch. For the test diffs, I don't seen any concerning regressions. I took a fairly close look at the RISCV ones, but only skimmed the x86 (particularly vector x86) changes. Differential Revision: https://reviews.llvm.org/D141017
217 lines
8.6 KiB
LLVM
217 lines
8.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
|
|
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
|
|
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
|
|
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
|
|
|
|
define i32 @foo() local_unnamed_addr #0 {
|
|
; CHECK-X64-LABEL: foo:
|
|
; CHECK-X64: # %bb.0:
|
|
; CHECK-X64-NEXT: movq %rsp, %r11
|
|
; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
|
|
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
|
|
; CHECK-X64-NEXT: movq $0, (%rsp)
|
|
; CHECK-X64-NEXT: cmpq %r11, %rsp
|
|
; CHECK-X64-NEXT: jne .LBB0_1
|
|
; CHECK-X64-NEXT: # %bb.2:
|
|
; CHECK-X64-NEXT: subq $2248, %rsp # imm = 0x8C8
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888
|
|
; CHECK-X64-NEXT: movl $1, 264(%rsp)
|
|
; CHECK-X64-NEXT: movl $1, 28664(%rsp)
|
|
; CHECK-X64-NEXT: movl -128(%rsp), %eax
|
|
; CHECK-X64-NEXT: addq $71880, %rsp # imm = 0x118C8
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X64-NEXT: retq
|
|
;
|
|
; CHECK-X86-LABEL: foo:
|
|
; CHECK-X86: # %bb.0:
|
|
; CHECK-X86-NEXT: movl %esp, %eax
|
|
; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
|
|
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
|
|
; CHECK-X86-NEXT: movl $0, (%esp)
|
|
; CHECK-X86-NEXT: cmpl %eax, %esp
|
|
; CHECK-X86-NEXT: jne .LBB0_1
|
|
; CHECK-X86-NEXT: # %bb.2:
|
|
; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 72016
|
|
; CHECK-X86-NEXT: movl $1, 392(%esp)
|
|
; CHECK-X86-NEXT: movl $1, 28792(%esp)
|
|
; CHECK-X86-NEXT: movl (%esp), %eax
|
|
; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
|
|
; CHECK-X86-NEXT: retl
|
|
;
|
|
; CHECK-X32-LABEL: foo:
|
|
; CHECK-X32: # %bb.0:
|
|
; CHECK-X32-NEXT: movl %esp, %r11d
|
|
; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
|
|
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
|
|
; CHECK-X32-NEXT: movq $0, (%esp)
|
|
; CHECK-X32-NEXT: cmpl %r11d, %esp
|
|
; CHECK-X32-NEXT: jne .LBB0_1
|
|
; CHECK-X32-NEXT: # %bb.2:
|
|
; CHECK-X32-NEXT: subl $2248, %esp # imm = 0x8C8
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888
|
|
; CHECK-X32-NEXT: movl $1, 264(%esp)
|
|
; CHECK-X32-NEXT: movl $1, 28664(%esp)
|
|
; CHECK-X32-NEXT: movl -128(%esp), %eax
|
|
; CHECK-X32-NEXT: addl $71880, %esp # imm = 0x118C8
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X32-NEXT: retq
|
|
%a = alloca i32, i64 18000, align 16
|
|
%b0 = getelementptr inbounds i32, ptr %a, i64 98
|
|
%b1 = getelementptr inbounds i32, ptr %a, i64 7198
|
|
store volatile i32 1, ptr %b0
|
|
store volatile i32 1, ptr %b1
|
|
%c = load volatile i32, ptr %a
|
|
ret i32 %c
|
|
}
|
|
|
|
define void @push_before_probe(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) "probe-stack"="inline-asm" "no_caller_saved_registers" {
|
|
; CHECK-X64-LABEL: push_before_probe:
|
|
; CHECK-X64: # %bb.0:
|
|
; CHECK-X64-NEXT: pushq %rax
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X64-NEXT: movq %rsp, %r11
|
|
; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
|
|
; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
|
|
; CHECK-X64-NEXT: movq $0, (%rsp)
|
|
; CHECK-X64-NEXT: cmpq %r11, %rsp
|
|
; CHECK-X64-NEXT: jne .LBB1_1
|
|
; CHECK-X64-NEXT: # %bb.2:
|
|
; CHECK-X64-NEXT: subq $2240, %rsp # imm = 0x8C0
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888
|
|
; CHECK-X64-NEXT: .cfi_offset %rax, -16
|
|
; CHECK-X64-NEXT: movl 71888(%rsp), %eax
|
|
; CHECK-X64-NEXT: addl %esi, %edi
|
|
; CHECK-X64-NEXT: addl %ecx, %edx
|
|
; CHECK-X64-NEXT: addl %edi, %edx
|
|
; CHECK-X64-NEXT: addl %r9d, %r8d
|
|
; CHECK-X64-NEXT: addl 71896(%rsp), %eax
|
|
; CHECK-X64-NEXT: addl %r8d, %eax
|
|
; CHECK-X64-NEXT: addl %edx, %eax
|
|
; CHECK-X64-NEXT: movl %eax, 264(%rsp)
|
|
; CHECK-X64-NEXT: movl %eax, 28664(%rsp)
|
|
; CHECK-X64-NEXT: addq $71872, %rsp # imm = 0x118C0
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X64-NEXT: popq %rax
|
|
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X64-NEXT: retq
|
|
;
|
|
; CHECK-X86-LABEL: push_before_probe:
|
|
; CHECK-X86: # %bb.0:
|
|
; CHECK-X86-NEXT: pushl %esi
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X86-NEXT: pushl %edx
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 12
|
|
; CHECK-X86-NEXT: pushl %ecx
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X86-NEXT: pushl %eax
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 20
|
|
; CHECK-X86-NEXT: movl %esp, %eax
|
|
; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
|
|
; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X86-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
|
|
; CHECK-X86-NEXT: movl $0, (%esp)
|
|
; CHECK-X86-NEXT: cmpl %eax, %esp
|
|
; CHECK-X86-NEXT: jne .LBB1_1
|
|
; CHECK-X86-NEXT: # %bb.2:
|
|
; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 72032
|
|
; CHECK-X86-NEXT: .cfi_offset %eax, -20
|
|
; CHECK-X86-NEXT: .cfi_offset %ecx, -16
|
|
; CHECK-X86-NEXT: .cfi_offset %edx, -12
|
|
; CHECK-X86-NEXT: .cfi_offset %esi, -8
|
|
; CHECK-X86-NEXT: movl 72056(%esp), %eax
|
|
; CHECK-X86-NEXT: movl 72048(%esp), %edx
|
|
; CHECK-X86-NEXT: movl 72040(%esp), %ecx
|
|
; CHECK-X86-NEXT: movl 72032(%esp), %esi
|
|
; CHECK-X86-NEXT: addl 72036(%esp), %esi
|
|
; CHECK-X86-NEXT: addl 72044(%esp), %ecx
|
|
; CHECK-X86-NEXT: addl %esi, %ecx
|
|
; CHECK-X86-NEXT: addl 72052(%esp), %edx
|
|
; CHECK-X86-NEXT: addl 72060(%esp), %eax
|
|
; CHECK-X86-NEXT: addl %edx, %eax
|
|
; CHECK-X86-NEXT: addl %ecx, %eax
|
|
; CHECK-X86-NEXT: movl %eax, 392(%esp)
|
|
; CHECK-X86-NEXT: movl %eax, 28792(%esp)
|
|
; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 20
|
|
; CHECK-X86-NEXT: popl %eax
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X86-NEXT: popl %ecx
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 12
|
|
; CHECK-X86-NEXT: popl %edx
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X86-NEXT: popl %esi
|
|
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
|
|
; CHECK-X86-NEXT: retl
|
|
;
|
|
; CHECK-X32-LABEL: push_before_probe:
|
|
; CHECK-X32: # %bb.0:
|
|
; CHECK-X32-NEXT: pushq %rax
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X32-NEXT: movl %esp, %r11d
|
|
; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
|
|
; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 69632
|
|
; CHECK-X32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
|
|
; CHECK-X32-NEXT: movq $0, (%esp)
|
|
; CHECK-X32-NEXT: cmpl %r11d, %esp
|
|
; CHECK-X32-NEXT: jne .LBB1_1
|
|
; CHECK-X32-NEXT: # %bb.2:
|
|
; CHECK-X32-NEXT: subl $2240, %esp # imm = 0x8C0
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888
|
|
; CHECK-X32-NEXT: .cfi_offset %rax, -16
|
|
; CHECK-X32-NEXT: movl 71888(%esp), %eax
|
|
; CHECK-X32-NEXT: addl %esi, %edi
|
|
; CHECK-X32-NEXT: addl %ecx, %edx
|
|
; CHECK-X32-NEXT: addl %edi, %edx
|
|
; CHECK-X32-NEXT: addl %r9d, %r8d
|
|
; CHECK-X32-NEXT: addl 71896(%esp), %eax
|
|
; CHECK-X32-NEXT: addl %r8d, %eax
|
|
; CHECK-X32-NEXT: addl %edx, %eax
|
|
; CHECK-X32-NEXT: movl %eax, 264(%esp)
|
|
; CHECK-X32-NEXT: movl %eax, 28664(%esp)
|
|
; CHECK-X32-NEXT: addl $71872, %esp # imm = 0x118C0
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-X32-NEXT: popq %rax
|
|
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
|
|
; CHECK-X32-NEXT: retq
|
|
%all = alloca i32, i64 18000, align 16
|
|
%b0 = getelementptr inbounds i32, ptr %all, i64 98
|
|
%b1 = getelementptr inbounds i32, ptr %all, i64 7198
|
|
%ab = add i32 %a, %b
|
|
%cd = add i32 %c, %d
|
|
%ef = add i32 %e, %f
|
|
%gh = add i32 %g, %h
|
|
%abcd = add i32 %ab, %cd
|
|
%efgh = add i32 %ef, %gh
|
|
%sum = add i32 %abcd, %efgh
|
|
store volatile i32 %sum, ptr %b0
|
|
store volatile i32 %sum, ptr %b1
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = {"probe-stack"="inline-asm"}
|