Files
clang-p2996/llvm/test/CodeGen/AArch64/machine-outliner.mir
Xuan Zhang d9a00ed366 [MachineOutliner] Leaf Descendants (#90275)
This PR  depends on https://github.com/llvm/llvm-project/pull/90264

In the current implementation, only leaf children of each internal node
in the suffix tree are included as candidates for outlining. But all
leaf descendants are outlining candidates, which we include in the new
implementation. This is enabled on a flag `outliner-leaf-descendants`
which is default to be true.

The reason for _enabling this on a flag_ is because machine outliner is
not the only pass that uses suffix tree.

The reason for _having this default to be true_ is because including all
leaf descendants show consistent size win.
* For Clang/LLD, it shows around 3% reduction in text segment size when
compared to the baseline `-Oz` linker binary.
 * For selected benchmark tests in LLVM test suite 
 
| run (CTMark/) | only leaf children | all leaf descendants | reduction
% |

|------------------|--------------------|----------------------|-------------|
| lencod | 349624 | 348564 | -0.2004% |
| SPASS | 219672 | 218440 | -0.4738% |
| kc | 271956 | 250068 | -0.4506% |
| sqlite3 | 223920 | 222484 | -0.5471% |
| 7zip-benchmark | 405364 | 401244 | -0.3428% |
| bullet | 139820 | 138340 | -0.8315% |
| consumer-typeset | 295684 | 286628 | -1.2295% |
| pairlocalalign | 72236 | 71936 | -0.2164% |
| tramp3d-v4 | 189572 | 183676 | -2.9668% |

This is part of an enhanced version of machine outliner -- see
[RFC](https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-1-fulllto-part-2-thinlto-nolto-to-come/78732).
2024-06-18 07:13:05 -07:00

170 lines
4.5 KiB
YAML

# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf -outliner-leaf-descendants=false %s -o - | FileCheck %s
--- |
@x = common global i32 0, align 4
define void @baz() #0 {
ret void
}
define i32 @main() #0 {
ret i32 0
}
define void @bar(i32 %a) #0 {
ret void
}
attributes #0 = { noinline noredzone }
...
---
# This test ensures that we
# - Create outlined functions
# - Don't outline anything to do with LR or W30
# - Save LR when it's not available
# - Functions whose addresses are taken can still be outlined
#
# CHECK-LABEL: main
# CHECK-LABEL: bb.1:
# CHECK-DAG: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG:[0-9]+]], 0
# CHECK-NEXT: STRHHroW $w12, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK-DAG: bb.2
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: STRHHroW $w12, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK-DAG: bb.3
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: STRHHroW $w12, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
name: main
tracksRegLiveness: true
body: |
bb.0:
liveins: $lr
$sp = frame-setup SUBXri $sp, 16, 0
renamable $x9 = ADRP target-flags(aarch64-page) @bar
$x9 = ORRXri $xzr, 1
$w12 = ORRWri $wzr, 1
$w30 = ORRWri $wzr, 1
$lr = ORRXri $xzr, 1
bb.1:
liveins: $lr
$x20, $x19 = LDPXi $sp, 10
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
renamable $x9 = ADRP target-flags(aarch64-page) @x
$x12 = ADDXri $sp, 48, 0;
STRHHroW $w12, $x9, $w30, 1, 1
$lr = ORRXri $xzr, 1
bb.2:
liveins: $lr
$x20, $x19 = LDPXi $sp, 10
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
renamable $x9 = ADRP target-flags(aarch64-page) @x
$x12 = ADDXri $sp, 48, 0;
STRHHroW $w12, $x9, $w30, 1, 1
$lr = ORRXri $xzr, 1
bb.3:
liveins: $lr
$x20, $x19 = LDPXi $sp, 10
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 1
renamable $x9 = ADRP target-flags(aarch64-page) @x
$x12 = ADDXri $sp, 48, 0;
STRHHroW $w12, $x9, $w30, 1, 1
$lr = ORRXri $xzr, 1
$sp = ADDXri $sp, 16, 0
bb.4:
liveins: $lr
RET undef $lr
...
---
# This test ensures that we can avoid saving LR when it's available.
# It also makes sure that KILL instructions don't impact outlining.
# CHECK-LABEL: bb.1:
# CHECK-NOT: BL @baz, implicit-def dead $lr, implicit $sp
# CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def $lr, implicit $sp
# CHECK-NEXT: $w11 = ORRWri $wzr, 2
# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def $lr, implicit $sp
# CHECK-NEXT: $w8 = ORRWri $wzr, 0
# CHECK-NOT: $w11 = KILL renamable $w11, implicit killed $w11
name: bar
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $lr, $w8
$sp = frame-setup SUBXri $sp, 32, 0
$fp = frame-setup ADDXri $sp, 16, 0
bb.1:
BL @baz, implicit-def dead $lr, implicit $sp
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
$w11 = KILL renamable $w11, implicit killed $w11
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
BL @baz, implicit-def dead $lr, implicit $sp
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 2
BL @baz, implicit-def dead $lr, implicit $sp
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
BL @baz, implicit-def dead $lr, implicit $sp
$w11 = ORRWri $wzr, 1
$w11 = ORRWri $wzr, 1
$w8 = ORRWri $wzr, 0
bb.2:
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$x15 = ADDXri $sp, 48, 0;
$w9 = ORRWri $wzr, 0
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$w15 = ORRWri $wzr, 1
$x15 = ADDXri $sp, 48, 0;
$w8 = ORRWri $wzr, 0
bb.3:
$fp, $lr = LDPXi $sp, 2
$sp = ADDXri $sp, 32, 0
RET undef $lr
...
---
name: baz
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $lr, $w8
RET undef $lr
# CHECK-LABEL: name: OUTLINED_FUNCTION_{{[0-9]}}
# CHECK=LABEL: name: OUTLINED_FUNCTION_{{[1-9]}}