This patch is episode three of the middle end implementation for the coroutine HALO improvement project published on discourse: https://discourse.llvm.org/t/language-extension-for-better-more-deterministic-halo-for-c-coroutines/80044 After we attribute the calls to some coroutines as "coro_elide_safe" in the C++ FE and creating a `noalloc` ramp function, we use a new middle end pass to move the call to coroutines to the noalloc variant. This pass should be run after CoroSplit. For each node we process in CoroSplit, we look for its callers and replace the attributed ones in presplit coroutines to the noalloc one. The transformed `noalloc` ramp function will also require a frame pointer to a block of memory it can use as an activation frame. We allocate this on the caller's frame with an alloca. Please note that we cannot safely transform such attributed calls in post-split coroutines due to memory lifetime reasons. The CoroSplit pass is responsible for creating the coroutine frame spills for all the allocas in the coroutine. Therefore it will be unsafe to create new allocas like this one in post-split coroutines. This happens relatively rarely because CGSCC performs the passes on the callees before the caller. However, if multiple coroutines coexist in one SCC, this situation does happen (and prevents us from having potentially unbound frame size due to recursion.) You can find episode 1: Clang FE of this patch series at https://github.com/llvm/llvm-project/pull/99282 Episode 2: CoroSplit at https://github.com/llvm/llvm-project/pull/99283
235 lines
12 KiB
LLVM
235 lines
12 KiB
LLVM
; Validate ThinLTO postlink pipeline when we have Sample PGO
|
|
;
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<O1>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<O3>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<Os>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<Oz>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext
|
|
; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \
|
|
; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \
|
|
; RUN: -passes='thinlto<O2>' -S %s 2>&1 \
|
|
; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext
|
|
|
|
; Suppress FileCheck --allow-unused-prefixes=false diagnostics.
|
|
; CHECK-NOEXT: {{^}}
|
|
|
|
; CHECK-EP-PIPELINE-START: Running pass: NoOpModulePass
|
|
; CHECK-O: Running pass: SampleProfileLoaderPass
|
|
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
|
|
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
|
|
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
|
|
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
|
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
|
|
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
|
|
; CHECK-O-NEXT: Running pass: OpenMPOptPass
|
|
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
|
|
; CHECK-O-NEXT: Running pass: IPSCCPPass
|
|
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
|
|
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
|
|
; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
|
|
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
|
|
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
|
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
|
|
; CHECK-O-NEXT: Running pass: PromotePass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running analysis: AAManager on foo
|
|
; CHECK-O-NEXT: Running analysis: BasicAA
|
|
; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
|
|
; CHECK-O-NEXT: Running analysis: TypeBasedAA
|
|
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo
|
|
; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo
|
|
; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo
|
|
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
|
|
|
|
; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass
|
|
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
|
|
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
|
|
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
|
|
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
|
|
; CHECK-O-NEXT: Running analysis: GlobalsAA
|
|
; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
|
|
; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager
|
|
; CHECK-O-NEXT: Invalidating analysis: AAManager
|
|
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
|
|
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
|
|
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
|
|
; CHECK-O-NEXT: Running pass: InlinerPass
|
|
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
|
|
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
|
|
; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass
|
|
; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass
|
|
; CHECK-O-NEXT: Running pass: SROAPass
|
|
; CHECK-O-NEXT: Running pass: EarlyCSEPass
|
|
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
|
|
; CHECK-O-NEXT: Running analysis: AAManager
|
|
; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
|
|
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
|
|
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
|
|
; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
|
|
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass
|
|
; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
|
|
; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
|
|
; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass
|
|
; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: ReassociatePass
|
|
; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass
|
|
; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis
|
|
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LCSSAPass
|
|
; CHECK-O1-NEXT: Running analysis: ScalarEvolutionAnalysis
|
|
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: LICM
|
|
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
|
; CHECK-O-NEXT: Running pass: LICM
|
|
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
|
|
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LCSSAPass
|
|
; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass
|
|
; CHECK-O-NEXT: Running pass: IndVarSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LoopDeletionPass
|
|
; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
|
|
; CHECK-O-NEXT: Running pass: SROAPass on foo
|
|
; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
|
|
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
|
|
; CHECK-O23SZ-NEXT: Running pass: GVNPass
|
|
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
|
|
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
|
|
; CHECK-O-NEXT: Running pass: SCCPPass
|
|
; CHECK-O-NEXT: Running pass: BDCEPass
|
|
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
|
|
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
|
|
; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass
|
|
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
|
|
; CHECK-O1-NEXT: Running pass: CoroElidePass
|
|
; CHECK-O-NEXT: Running pass: ADCEPass
|
|
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
|
|
; CHECK-O23SZ-NEXT: Running pass: DSEPass
|
|
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass
|
|
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
|
|
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
|
|
; CHECK-O23SZ-NEXT: Running pass: LICMPass
|
|
; CHECK-O23SZ-NEXT: Running pass: CoroElidePass
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
|
|
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
|
|
; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
|
|
; CHECK-O-NEXT: Running pass: CoroSplitPass
|
|
; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
|
|
; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
|
|
; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
|
|
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
|
|
; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
|
|
; CHECK-O-NEXT: Running pass: CoroCleanupPass
|
|
; CHECK-O-NEXT: Running pass: GlobalOptPass
|
|
; CHECK-O-NEXT: Running pass: GlobalDCEPass
|
|
; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass
|
|
; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass
|
|
; CHECK-O-NEXT: Running pass: RecomputeGlobalsAAPass
|
|
; CHECK-O-NEXT: Running pass: Float2IntPass
|
|
; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass
|
|
; CHECK-O3-NEXT: Running pass: ControlHeightReductionPass
|
|
; CHECK-O3-NEXT: Running analysis: RegionInfoAnalysis
|
|
; CHECK-O3-NEXT: Running analysis: DominanceFrontierAnalysis
|
|
; CHECK-EXT: Running pass: {{.*}}::Bye
|
|
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LCSSAPass
|
|
; CHECK-O-NEXT: Running pass: LoopRotatePass
|
|
; CHECK-O-NEXT: Running pass: LoopDeletionPass
|
|
; CHECK-O-NEXT: Running pass: LoopDistributePass
|
|
; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
|
|
; CHECK-O-NEXT: Running pass: InjectTLIMappings
|
|
; CHECK-O-NEXT: Running pass: LoopVectorizePass
|
|
; CHECK-O-NEXT: Running pass: InferAlignmentPass
|
|
; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O2-NEXT: Running pass: SLPVectorizerPass
|
|
; CHECK-O3-NEXT: Running pass: SLPVectorizerPass
|
|
; CHECK-Os-NEXT: Running pass: SLPVectorizerPass
|
|
; CHECK-O-NEXT: Running pass: VectorCombinePass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running pass: LoopUnrollPass
|
|
; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
|
|
; CHECK-O-NEXT: Running pass: SROAPass
|
|
; CHECK-O-NEXT: Running pass: InferAlignmentPass
|
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
|
; CHECK-O-NEXT: Running pass: LoopSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: LCSSAPass
|
|
; CHECK-O-NEXT: Running pass: LICMPass
|
|
; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass
|
|
; CHECK-O-NEXT: Running pass: LoopSinkPass
|
|
; CHECK-O-NEXT: Running pass: InstSimplifyPass
|
|
; CHECK-O-NEXT: Running pass: DivRemPairsPass
|
|
; CHECK-O-NEXT: Running pass: TailCallElimPass
|
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
|
; CHECK-O-NEXT: Running pass: GlobalDCEPass
|
|
; CHECK-O-NEXT: Running pass: ConstantMergePass
|
|
; CHECK-O-NEXT: Running pass: CGProfilePass
|
|
; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass
|
|
; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
|
|
; CHECK-O-NEXT: Running pass: PrintModulePass
|
|
|
|
; Make sure we get the IR back out without changes when we print the module.
|
|
; CHECK-O-LABEL: define void @foo(i32 %n) local_unnamed_addr
|
|
; CHECK-O-NEXT: entry:
|
|
; CHECK-O-NEXT: br label %loop
|
|
; CHECK-O: loop:
|
|
; CHECK-O-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
|
; CHECK-O-NEXT: %iv.next = add i32 %iv, 1
|
|
; CHECK-O-NEXT: tail call void @bar()
|
|
; CHECK-O-NEXT: %cmp = icmp eq i32 %iv, %n
|
|
; CHECK-O-NEXT: br i1 %cmp, label %exit, label %loop
|
|
; CHECK-O: exit:
|
|
; CHECK-O-NEXT: ret void
|
|
; CHECK-O-NEXT: }
|
|
;
|
|
; Ignore a bunch of intervening metadata containing profile data.
|
|
;
|
|
|
|
declare void @bar() local_unnamed_addr
|
|
|
|
define void @foo(i32 %n) local_unnamed_addr {
|
|
entry:
|
|
br label %loop
|
|
loop:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%iv.next = add i32 %iv, 1
|
|
tail call void @bar()
|
|
%cmp = icmp eq i32 %iv, %n
|
|
br i1 %cmp, label %exit, label %loop
|
|
exit:
|
|
ret void
|
|
}
|