This PR depends on https://github.com/llvm/llvm-project/pull/90264 In the current implementation, only leaf children of each internal node in the suffix tree are included as candidates for outlining. But all leaf descendants are outlining candidates, which we include in the new implementation. This is enabled on a flag `outliner-leaf-descendants` which is default to be true. The reason for _enabling this on a flag_ is because machine outliner is not the only pass that uses suffix tree. The reason for _having this default to be true_ is because including all leaf descendants show consistent size win. * For Clang/LLD, it shows around 3% reduction in text segment size when compared to the baseline `-Oz` linker binary. * For selected benchmark tests in LLVM test suite | run (CTMark/) | only leaf children | all leaf descendants | reduction % | |------------------|--------------------|----------------------|-------------| | lencod | 349624 | 348564 | -0.2004% | | SPASS | 219672 | 218440 | -0.4738% | | kc | 271956 | 250068 | -0.4506% | | sqlite3 | 223920 | 222484 | -0.5471% | | 7zip-benchmark | 405364 | 401244 | -0.3428% | | bullet | 139820 | 138340 | -0.8315% | | consumer-typeset | 295684 | 286628 | -1.2295% | | pairlocalalign | 72236 | 71936 | -0.2164% | | tramp3d-v4 | 189572 | 183676 | -2.9668% | This is part of an enhanced version of machine outliner -- see [RFC](https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-1-fulllto-part-2-thinlto-nolto-to-come/78732).
102 lines
3.6 KiB
LLVM
102 lines
3.6 KiB
LLVM
; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
|
|
; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
|
|
|
|
define void @_Z1giii(i32 %x, i32 %y, i32 %z) minsize {
|
|
; NOOMIT-LABEL: _Z1giii:
|
|
; NOOMIT: // %bb.0: // %entry
|
|
; NOOMIT-NEXT: b _Z1hiii
|
|
;
|
|
; OMITFP-LABEL: _Z1giii:
|
|
; OMITFP: // %bb.0: // %entry
|
|
; OMITFP-NEXT: b _Z1hiii
|
|
entry:
|
|
tail call void @_Z1hiii(i32 %x, i32 %y, i32 %z)
|
|
ret void
|
|
}
|
|
|
|
declare void @_Z1hiii(i32, i32, i32) minsize
|
|
|
|
define void @_Z2f1v() minsize {
|
|
; NOOMIT-LABEL: _Z2f1v:
|
|
; NOOMIT: // %bb.0: // %entry
|
|
; NOOMIT-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; NOOMIT-NEXT: mov x29, sp
|
|
; NOOMIT-NEXT: .cfi_def_cfa w29, 16
|
|
; NOOMIT-NEXT: .cfi_offset w30, -8
|
|
; NOOMIT-NEXT: .cfi_offset w29, -16
|
|
; NOOMIT-NEXT: bl OUTLINED_FUNCTION_0
|
|
; NOOMIT-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; NOOMIT-NEXT: b _Z1giii
|
|
;
|
|
; OMITFP-LABEL: _Z2f1v:
|
|
; OMITFP: // %bb.0: // %entry
|
|
; OMITFP-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; OMITFP-NEXT: .cfi_def_cfa_offset 16
|
|
; OMITFP-NEXT: .cfi_offset w30, -16
|
|
; OMITFP-NEXT: bl OUTLINED_FUNCTION_0
|
|
; OMITFP-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; OMITFP-NEXT: b _Z1giii
|
|
entry:
|
|
tail call void @_Z1giii(i32 1, i32 2, i32 3)
|
|
tail call void @_Z1giii(i32 1, i32 2, i32 3)
|
|
ret void
|
|
}
|
|
|
|
define void @_Z2f2v() minsize {
|
|
; NOOMIT-LABEL: _Z2f2v:
|
|
; NOOMIT: // %bb.0: // %entry
|
|
; NOOMIT-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; NOOMIT-NEXT: mov x29, sp
|
|
; NOOMIT-NEXT: .cfi_def_cfa w29, 16
|
|
; NOOMIT-NEXT: .cfi_offset w30, -8
|
|
; NOOMIT-NEXT: .cfi_offset w29, -16
|
|
; NOOMIT-NEXT: bl OUTLINED_FUNCTION_0
|
|
; NOOMIT-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; NOOMIT-NEXT: b _Z1giii
|
|
;
|
|
; OMITFP-LABEL: _Z2f2v:
|
|
; OMITFP: // %bb.0: // %entry
|
|
; OMITFP-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
|
; OMITFP-NEXT: .cfi_def_cfa_offset 16
|
|
; OMITFP-NEXT: .cfi_offset w30, -16
|
|
; OMITFP-NEXT: bl OUTLINED_FUNCTION_0
|
|
; OMITFP-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
|
; OMITFP-NEXT: b _Z1giii
|
|
entry:
|
|
tail call void @_Z1giii(i32 1, i32 2, i32 3)
|
|
tail call void @_Z1giii(i32 1, i32 2, i32 3)
|
|
ret void
|
|
}
|
|
|
|
; OMITFP-LABEL: OUTLINED_FUNCTION_0:
|
|
; OMITFP: .cfi_startproc
|
|
; OMITFP-NEXT: // %bb.0:
|
|
; OMITFP-NEXT: .cfi_def_cfa_offset 16
|
|
; OMITFP-NEXT: .cfi_offset w30, -16
|
|
; OMITFP-NEXT: str x30, [sp, #-16]!
|
|
; OMITFP-NEXT: mov w0, #1
|
|
; OMITFP-NEXT: mov w1, #2
|
|
; OMITFP-NEXT: mov w2, #3
|
|
; OMITFP-NEXT: bl _Z1giii
|
|
; OMITFP-NEXT: mov w0, #1
|
|
; OMITFP-NEXT: mov w1, #2
|
|
; OMITFP-NEXT: mov w2, #3
|
|
; OMITFP-NEXT: ldr x30, [sp], #16
|
|
; OMITFP-NEXT: ret
|
|
|
|
; NOOMIT-LABEL: OUTLINED_FUNCTION_0:
|
|
; NOOMIT: .cfi_startproc
|
|
; NOOMIT-NEXT: // %bb.0:
|
|
; NOOMIT-NEXT: .cfi_def_cfa_offset 16
|
|
; NOOMIT-NEXT: .cfi_offset w30, -16
|
|
; NOOMIT-NEXT: str x30, [sp, #-16]!
|
|
; NOOMIT-NEXT: mov w0, #1
|
|
; NOOMIT-NEXT: mov w1, #2
|
|
; NOOMIT-NEXT: mov w2, #3
|
|
; NOOMIT-NEXT: bl _Z1giii
|
|
; NOOMIT-NEXT: mov w0, #1
|
|
; NOOMIT-NEXT: mov w1, #2
|
|
; NOOMIT-NEXT: mov w2, #3
|
|
; NOOMIT-NEXT: ldr x30, [sp], #16
|
|
; NOOMIT-NEXT: ret
|