[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (#120557)
Use `-passes="regallocgreedy<[all|sgpr|wwm|vgpr]>` to insert the greedy RA with a filter and `-regalloc-npm=<type>` to control which RA to use in existing pipeline.
This commit is contained in:
@@ -1063,7 +1063,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
|
||||
///
|
||||
/// A target that uses the standard regalloc pass order for fast or optimized
|
||||
/// allocation may still override this for per-target regalloc
|
||||
/// selection. But -regalloc=... always takes precedence.
|
||||
/// selection. But -regalloc-npm=... always takes precedence.
|
||||
/// If a target does not want to allow users to set -regalloc-npm=... at all,
|
||||
/// check if Opt.RegAlloc == RegAllocType::Unset.
|
||||
template <typename Derived, typename TargetMachineT>
|
||||
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
|
||||
AddMachinePass &addPass, bool Optimized) const {
|
||||
@@ -1076,10 +1078,29 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
|
||||
/// Find and instantiate the register allocation pass requested by this target
|
||||
/// at the current optimization level. Different register allocators are
|
||||
/// defined as separate passes because they may require different analysis.
|
||||
///
|
||||
/// This helper ensures that the -regalloc-npm= option is always available,
|
||||
/// even for targets that override the default allocator.
|
||||
template <typename Derived, typename TargetMachineT>
|
||||
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
|
||||
AddMachinePass &addPass, bool Optimized) const {
|
||||
// TODO: Parse Opt.RegAlloc to add register allocator.
|
||||
// Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
|
||||
if (Opt.RegAlloc > RegAllocType::Default) {
|
||||
switch (Opt.RegAlloc) {
|
||||
case RegAllocType::Fast:
|
||||
addPass(RegAllocFastPass());
|
||||
break;
|
||||
case RegAllocType::Greedy:
|
||||
addPass(RAGreedyPass());
|
||||
break;
|
||||
default:
|
||||
report_fatal_error("register allocator not supported yet", false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// -regalloc=default or unspecified, so pick based on the optimization level
|
||||
// or ask the target for the regalloc pass.
|
||||
derived().addTargetRegisterAllocator(addPass, Optimized);
|
||||
}
|
||||
|
||||
template <typename Derived, typename TargetMachineT>
|
||||
@@ -1150,20 +1171,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
|
||||
// PreRA instruction scheduling.
|
||||
addPass(MachineSchedulerPass(&TM));
|
||||
|
||||
if (derived().addRegAssignmentOptimized(addPass)) {
|
||||
// Allow targets to expand pseudo instructions depending on the choice of
|
||||
// registers before MachineCopyPropagation.
|
||||
derived().addPostRewrite(addPass);
|
||||
|
||||
// Copy propagate to forward register uses and try to eliminate COPYs that
|
||||
// were not coalesced.
|
||||
addPass(MachineCopyPropagationPass());
|
||||
|
||||
// Run post-ra machine LICM to hoist reloads / remats.
|
||||
//
|
||||
// FIXME: can this move into MachineLateOptimization?
|
||||
addPass(MachineLICMPass());
|
||||
if (auto E = derived().addRegAssignmentOptimized(addPass)) {
|
||||
// addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
|
||||
return;
|
||||
}
|
||||
// Allow targets to expand pseudo instructions depending on the choice of
|
||||
// registers before MachineCopyPropagation.
|
||||
derived().addPostRewrite(addPass);
|
||||
|
||||
// Copy propagate to forward register uses and try to eliminate COPYs that
|
||||
// were not coalesced.
|
||||
addPass(MachineCopyPropagationPass());
|
||||
|
||||
// Run post-ra machine LICM to hoist reloads / remats.
|
||||
//
|
||||
// FIXME: can this move into MachineLateOptimization?
|
||||
addPass(MachineLICMPass());
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
@@ -196,13 +196,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
|
||||
},
|
||||
"filter=reg-filter;no-clear-vregs")
|
||||
|
||||
// 'all' is the default filter.
|
||||
MACHINE_FUNCTION_PASS_WITH_PARAMS(
|
||||
"greedy", "RAGreedyPass",
|
||||
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
|
||||
[PB = this](StringRef Params) {
|
||||
// TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
|
||||
(void)PB;
|
||||
return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
|
||||
return parseRegAllocGreedyFilterFunc(*PB, Params);
|
||||
}, "reg-filter"
|
||||
)
|
||||
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
|
||||
|
||||
@@ -14,13 +14,29 @@
|
||||
#ifndef LLVM_TARGET_CGPASSBUILDEROPTION_H
|
||||
#define LLVM_TARGET_CGPASSBUILDEROPTION_H
|
||||
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
|
||||
enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
|
||||
enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
|
||||
|
||||
class RegAllocTypeParser : public cl::parser<RegAllocType> {
|
||||
public:
|
||||
RegAllocTypeParser(cl::Option &O) : cl::parser<RegAllocType>(O) {}
|
||||
void initialize() {
|
||||
cl::parser<RegAllocType>::initialize();
|
||||
addLiteralOption("default", RegAllocType::Default,
|
||||
"Default register allocator");
|
||||
addLiteralOption("pbqp", RegAllocType::PBQP, "PBQP register allocator");
|
||||
addLiteralOption("fast", RegAllocType::Fast, "Fast register allocator");
|
||||
addLiteralOption("basic", RegAllocType::Basic, "Basic register allocator");
|
||||
addLiteralOption("greedy", RegAllocType::Greedy,
|
||||
"Greedy register allocator");
|
||||
}
|
||||
};
|
||||
|
||||
// Not one-on-one but mostly corresponding to commandline options in
|
||||
// TargetPassConfig.cpp.
|
||||
@@ -52,7 +68,7 @@ struct CGPassBuilderOption {
|
||||
bool RequiresCodeGenSCCOrder = false;
|
||||
|
||||
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
|
||||
StringRef RegAlloc = "default";
|
||||
RegAllocType RegAlloc = RegAllocType::Unset;
|
||||
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
|
||||
std::string FSProfileFile;
|
||||
std::string FSRemappingFile;
|
||||
|
||||
@@ -1415,6 +1415,20 @@ parseBoundsCheckingOptions(StringRef Params) {
|
||||
return Options;
|
||||
}
|
||||
|
||||
Expected<RAGreedyPass::Options>
|
||||
parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
|
||||
if (Params.empty() || Params == "all")
|
||||
return RAGreedyPass::Options();
|
||||
|
||||
std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
|
||||
if (Filter)
|
||||
return RAGreedyPass::Options{*Filter, Params};
|
||||
|
||||
return make_error<StringError>(
|
||||
formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
|
||||
inconvertibleErrorCode());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/// Tests whether a pass name starts with a valid prefix for a default pipeline
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
|
||||
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=greedy -o - %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
|
||||
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=arm64-apple-ios -passes=greedy -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: widget
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# Check that we spill %31 and do not rematerialize it since the use operand
|
||||
# of ADDXri is killed by the STRXui in this block.
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
|
||||
--- |
|
||||
define i64 @test_subreg_spill_fold() { ret i64 0 }
|
||||
define i64 @test_subreg_spill_fold2() { ret i64 0 }
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
|
||||
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
|
||||
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
|
||||
# RUN: llc -passes=greedy -o - %s | FileCheck %s
|
||||
|
||||
# Check that passes are initialized correctly, so that it's possible to
|
||||
# use -run-pass.
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
|
||||
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=greedy
|
||||
#PR34502. Check HoistSpill works properly after the live range of spilled
|
||||
#virtual register is cleared.
|
||||
--- |
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
|
||||
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
|
||||
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=greedy %s -o - | FileCheck %s
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# REQUIRES: asserts
|
||||
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
|
||||
# RUN: llc -mtriple=x86_64-- -passes=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
|
||||
# Check no global region split is needed because the live range to split is trivially rematerializable.
|
||||
# CHECK-NOT: Compact region bundles
|
||||
--- |
|
||||
|
||||
6
llvm/test/tools/llc/new-pm/x86_64-regalloc-pipeline.mir
Normal file
6
llvm/test/tools/llc/new-pm/x86_64-regalloc-pipeline.mir
Normal file
@@ -0,0 +1,6 @@
|
||||
# REQUIRES x86_64-registered-target
|
||||
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=fast -print-pipeline-passes %s 2>&1 | FileCheck %s
|
||||
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=greedy -print-pipeline-passes %s 2>&1 | FileCheck %s --check-prefix=CHECK-GREEDY
|
||||
|
||||
# CHECK: regallocfast
|
||||
# CHECK-GREEDY: greedy<all>
|
||||
@@ -48,10 +48,10 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<std::string>
|
||||
static cl::opt<RegAllocType, false, RegAllocTypeParser>
|
||||
RegAlloc("regalloc-npm",
|
||||
cl::desc("Register allocator to use for new pass manager"),
|
||||
cl::Hidden, cl::init("default"));
|
||||
cl::Hidden, cl::init(RegAllocType::Unset));
|
||||
|
||||
static cl::opt<bool>
|
||||
DebugPM("debug-pass-manager", cl::Hidden,
|
||||
|
||||
Reference in New Issue
Block a user