[Clang] Make --lto-partitions only default for HIP (#133164)

Summary:
The default behavior for LTO on other targets does not specify the
number of LTO partitions. Recent changes made this default to 8 on
AMDGPU which had some issues with the `libc` project. The option to
disable this is HIP only so I think for now we should restrict this just
to HIP.

I'm definitely on board with getting some more parallelism here, but I
think it should probably be restricted to just offloading languages. The
new driver goes through the `--target=amdgcn-amd-amdhsa` for its output,
which means we'd need to forward the default somehow.
This commit is contained in:
Joseph Huber
2025-03-27 07:34:57 -05:00
committed by GitHub
parent 17d0569538
commit d0aa1f9c43
4 changed files with 28 additions and 37 deletions

View File

@@ -633,7 +633,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
if (!ThinLTO)
if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
} else if (Args.hasArg(options::OPT_mcpu_EQ)) {
CmdArgs.push_back(Args.MakeArgString(
@@ -712,14 +712,12 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
}
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
// In the absence of an option, use 8 as the default.
if (!A)
return 8;
int Value = 0;
if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
if (A.getAsInteger(10, Value) || (Value < 1)) {
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
D.Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
<< Arg->getAsString(Args) << Arg->getValue();
return 1;
}
@@ -729,13 +727,8 @@ static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
void amdgpu::addFullLTOPartitionOption(const Driver &D,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) {
// TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
// also do it for non gpu-rdc LTO
if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
CmdArgs.push_back(
Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
}
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
Twine(getFullLTOPartitions(D, Args))));
}
/// AMDGPU Toolchain

View File

@@ -21,7 +21,7 @@
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions={{[0-9]+}}"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
@@ -38,17 +38,3 @@
// RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \
// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
// STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
// Check --flto-partitions
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
// RUN: -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
// LTO_PARTS: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
// RUN: -L. -flto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
// RUN: -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'

View File

@@ -161,3 +161,21 @@
// output the executable
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
// Check --flto-partitions
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
// RUN: -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
// RUN: -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
// RUN: -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'

View File

@@ -33,14 +33,8 @@ function(add_startup_object name)
set_target_properties(${fq_target_name}.exe PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}
RUNTIME_OUTPUT_NAME ${name}.o)
# FIXME: A bug in the AMDGPU LTO pass is incorrectly removing the kernels.
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_link_options(${fq_target_name}.exe PRIVATE
"-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
else()
target_link_options(${fq_target_name}.exe PRIVATE
"-r" "-nostdlib" "-Wl,--lto-emit-llvm")
endif()
target_link_options(${fq_target_name}.exe PRIVATE
"-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
endif()
endfunction()