Our strategy for localizing globals in the entry block breaks down when we have large functions with high register pressure, using lots of globals. When this happens, our heuristics say that globals with many uses should not be localized, leading us to cause excessive spills and stack usage. These situations are also exacerbated by LTO which tends to generate large functions. For now, moving to a strategy that's simpler and more akin to SelectionDAG fixes these issues and makes our codegen more similar. This has an overall neutral effect on size on CTMark, while showing slight improvements with -Os -flto on benchmarks. For low level firmware software though we see big improvements. The reason this is neutral, and not an improvement, is because we give up the gains from CSE'ing globals in cases where we low register pressure. I think this can be addressed in future with some better heuristics. Differential Revision: https://reviews.llvm.org/D147484
93 lines
4.0 KiB
LLVM
93 lines
4.0 KiB
LLVM
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -O0 \
|
|
; RUN: | FileCheck %s --check-prefixes=ENABLED,FALLBACK
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs -O0 \
|
|
; RUN: | FileCheck %s --check-prefixes=ENABLED,FALLBACK,VERIFY,VERIFY-O0
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -O0 -aarch64-enable-global-isel-at-O=0 -global-isel-abort=1 \
|
|
; RUN: | FileCheck %s --check-prefixes=ENABLED,NOFALLBACK
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -O0 -aarch64-enable-global-isel-at-O=0 -global-isel-abort=2 \
|
|
; RUN: | FileCheck %s --check-prefixes=ENABLED,FALLBACK
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -global-isel \
|
|
; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK --check-prefix ENABLED-O1
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -global-isel -global-isel-abort=2 \
|
|
; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK --check-prefix ENABLED-O1
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=3 \
|
|
; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O1
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=0 \
|
|
; RUN: | FileCheck %s --check-prefix DISABLED
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 -aarch64-enable-global-isel-at-O=-1 \
|
|
; RUN: | FileCheck %s --check-prefix DISABLED
|
|
|
|
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -verify-machineinstrs=0 | FileCheck %s --check-prefix DISABLED
|
|
|
|
; RUN: llc -mtriple=aarch64-- -fast-isel=0 -global-isel=false \
|
|
; RUN: --debugify-and-strip-all-safe=0 \
|
|
; RUN: -debug-pass=Structure %s -o /dev/null 2>&1 -verify-machineinstrs=0 \
|
|
; RUN: | FileCheck %s --check-prefix DISABLED
|
|
|
|
; ENABLED: Safe Stack instrumentation pass
|
|
|
|
; ENABLED-O1: Basic Alias Analysis (stateless AA impl)
|
|
; ENABLED-O1-NEXT: Function Alias Analysis Results
|
|
; ENABLED: IRTranslator
|
|
; VERIFY-NEXT: Verify generated machine code
|
|
; ENABLED-NEXT: Analysis for ComputingKnownBits
|
|
; ENABLED-O1-NEXT: MachineDominator Tree Construction
|
|
; ENABLED-O1-NEXT: Analysis containing CSE Info
|
|
; ENABLED-O1-NEXT: PreLegalizerCombiner
|
|
; ENABLED-O1-NEXT: Localizer
|
|
; VERIFY-O0-NEXT: AArch64O0PreLegalizerCombiner
|
|
; VERIFY-NEXT: Verify generated machine code
|
|
; ENABLED-O1-NEXT: LoadStoreOpt
|
|
; ENABLED-O1-NEXT: Analysis containing CSE Info
|
|
; ENABLED: Legalizer
|
|
; VERIFY-NEXT: Verify generated machine code
|
|
; ENABLED: RegBankSelect
|
|
; VERIFY-NEXT: Verify generated machine code
|
|
; ENABLED-NEXT: Analysis for ComputingKnownBits
|
|
; ENABLED-O1-NEXT: Lazy Branch Probability Analysis
|
|
; ENABLED-O1-NEXT: Lazy Block Frequency Analysis
|
|
; ENABLED-NEXT: InstructionSelect
|
|
; ENABLED-O1-NEXT: AArch64 Post Select Optimizer
|
|
; VERIFY-NEXT: Verify generated machine code
|
|
; ENABLED-NEXT: ResetMachineFunction
|
|
|
|
; FALLBACK: AArch64 Instruction Selection
|
|
; NOFALLBACK-NOT: AArch64 Instruction Selection
|
|
|
|
; DISABLED-NOT: IRTranslator
|
|
|
|
; DISABLED: AArch64 Instruction Selection
|
|
; DISABLED: Finalize ISel and expand pseudo-instructions
|
|
|
|
define void @empty() {
|
|
ret void
|
|
}
|