[OpenMP] Add OpenMP offloading toolchain for AMDGPU

This patch adds AMDGPUOpenMPToolChain for supporting OpenMP
offloading to AMD GPU's.

Originally authored by Greg Rodgers

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D94961
This commit is contained in:
Pushpinder Singh
2021-01-19 13:43:47 +05:30
committed by Pushpinder Singh
parent c7189ba785
commit fcf03e7280
7 changed files with 429 additions and 16 deletions

View File

@@ -36,6 +36,7 @@ add_clang_library(clangDriver
ToolChains/AIX.cpp
ToolChains/Ananas.cpp
ToolChains/AMDGPU.cpp
ToolChains/AMDGPUOpenMP.cpp
ToolChains/AVR.cpp
ToolChains/BareMetal.cpp
ToolChains/Clang.cpp

View File

@@ -10,6 +10,7 @@
#include "InputInfo.h"
#include "ToolChains/AIX.h"
#include "ToolChains/AMDGPU.h"
#include "ToolChains/AMDGPUOpenMP.h"
#include "ToolChains/AVR.h"
#include "ToolChains/Ananas.h"
#include "ToolChains/BareMetal.h"
@@ -739,18 +740,27 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
Diag(clang::diag::err_drv_invalid_omp_target) << Val;
else {
const ToolChain *TC;
// CUDA toolchains have to be selected differently. They pair host
// Device toolchains have to be selected differently. They pair host
// and device in their implementation.
if (TT.isNVPTX()) {
if (TT.isNVPTX() || TT.isAMDGCN()) {
const ToolChain *HostTC =
C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "Host toolchain should be always defined.");
auto &CudaTC =
auto &DeviceTC =
ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
if (!CudaTC)
CudaTC = std::make_unique<toolchains::CudaToolChain>(
*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
TC = CudaTC.get();
if (!DeviceTC) {
if (TT.isNVPTX())
DeviceTC = std::make_unique<toolchains::CudaToolChain>(
*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
else if (TT.isAMDGCN())
DeviceTC =
std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
*this, TT, *HostTC, C.getInputArgs());
else
assert(DeviceTC && "Device toolchain not defined.");
}
TC = DeviceTC.get();
} else
TC = &getToolChain(C.getInputArgs(), TT);
C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);

View File

@@ -64,6 +64,13 @@ public:
bool IsIntegratedAssemblerDefault() const override { return true; }
bool IsMathErrnoDefault() const override { return false; }
bool useIntegratedAs() const override { return true; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
bool isPIEDefault() const override { return false; }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;

View File

@@ -0,0 +1,262 @@
//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUOpenMP.h"
#include "AMDGPU.h"
#include "CommonArgs.h"
#include "InputInfo.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
using namespace clang::driver;
using namespace clang::driver::toolchains;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
namespace {
static const char *getOutputFileName(Compilation &C, StringRef Base,
const char *Postfix,
const char *Extension) {
const char *OutputFileName;
if (C.getDriver().isSaveTempsEnabled()) {
OutputFileName =
C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
} else {
std::string TmpName =
C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
return OutputFileName;
}
static void addLLCOptArg(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
StringRef OOpt = "0";
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
else if (A->getOption().matches(options::OPT_O)) {
// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
// so we map -Os/-Oz to -O2.
// Only clang supports -Og, and maps it to -O1.
// We map anything else to -O2.
OOpt = llvm::StringSwitch<const char *>(A->getValue())
.Case("1", "1")
.Case("2", "2")
.Case("3", "3")
.Case("s", "2")
.Case("z", "2")
.Case("g", "1")
.Default("0");
}
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
}
}
} // namespace
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const ArgList &Args, StringRef SubArchName,
StringRef OutputFilePrefix) const {
ArgStringList CmdArgs;
for (const auto &II : Inputs)
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
// Add an intermediate output file.
CmdArgs.push_back("-o");
const char *OutputFileName =
getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
CmdArgs.push_back(OutputFileName);
const char *Exec =
Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(OutputFileName))));
return OutputFileName;
}
const char *AMDGCN::OpenMPLinker::constructLlcCommand(
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix, const char *InputFileName,
bool OutputIsAsm) const {
// Construct llc command.
ArgStringList LlcArgs;
// The input to llc is the output from opt.
LlcArgs.push_back(InputFileName);
// Pass optimization arg to llc.
addLLCOptArg(Args, LlcArgs);
LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
LlcArgs.push_back(
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
LlcArgs.push_back(A->getValue(0));
}
// Add output filename
LlcArgs.push_back("-o");
const char *LlcOutputFile =
getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
LlcArgs.push_back(LlcOutputFile);
const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
return LlcOutputFile;
}
void AMDGCN::OpenMPLinker::constructLldCommand(
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
const InputInfo &Output, const llvm::opt::ArgList &Args,
const char *InputFileName) const {
// Construct lld command.
// The output from ld.lld is an HSA code object file.
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
"-shared", "-o", Output.getFilename(),
InputFileName};
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
}
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
assert(GPUArch.startswith("gfx") && "Unsupported sub arch");
// Prefix for temporary file name.
std::string Prefix;
for (const auto &II : Inputs)
if (II.isFilename())
Prefix =
llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch.str();
assert(Prefix.length() && "no linker inputs are files ");
// Each command outputs different files.
const char *LLVMLinkCommand =
constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
Prefix, LLVMLinkCommand);
constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
}
AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
const llvm::Triple &Triple,
const ToolChain &HostTC,
const ArgList &Args)
: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
}
void AMDGPUOpenMPToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
"Only OpenMP offloading kinds are supported.");
CC1Args.push_back("-target-cpu");
CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
CC1Args.push_back("-fcuda-is-device");
CC1Args.push_back("-emit-llvm-bc");
}
llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());
const OptTable &Opts = getDriver().getOpts();
if (DeviceOffloadKind != Action::OFK_OpenMP) {
for (Arg *A : Args) {
DAL->append(A);
}
}
if (!BoundArch.empty()) {
DAL->eraseArg(options::OPT_march_EQ);
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
BoundArch);
}
return DAL;
}
Tool *AMDGPUOpenMPToolChain::buildLinker() const {
assert(getTriple().isAMDGCN());
return new tools::AMDGCN::OpenMPLinker(*this);
}
void AMDGPUOpenMPToolChain::addClangWarningOptions(
ArgStringList &CC1Args) const {
HostTC.addClangWarningOptions(CC1Args);
}
ToolChain::CXXStdlibType
AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
return HostTC.GetCXXStdlibType(Args);
}
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
}
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
ArgStringList &CC1Args) const {
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
}
SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
// The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
// allows sanitizer arguments on the command line if they are supported by the
// host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
// line arguments for any of these "supported" sanitizers. That means that no
// sanitization of device code is actually supported at this time.
//
// This behavior is necessary because the host and device toolchains
// invocations often share the command line, so the device toolchain must
// tolerate flags meant only for the host toolchain.
return HostTC.getSupportedSanitizers();
}
VersionTuple
AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
const ArgList &Args) const {
return HostTC.computeMSVCVersion(D, Args);
}

View File

@@ -0,0 +1,106 @@
//===- AMDGPUOpenMP.h - AMDGPUOpenMP ToolChain Implementation -*- C++ -*---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
#include "AMDGPU.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
namespace clang {
namespace driver {
namespace tools {
namespace AMDGCN {
// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with
// device library, then compiles it to ISA in a shared object.
class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
public:
OpenMPLinker(const ToolChain &TC)
: Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {}
bool hasIntegratedCPP() const override { return false; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
private:
/// \return llvm-link output file name.
const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix) const;
/// \return llc output file name.
const char *constructLlcCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
llvm::StringRef SubArchName,
llvm::StringRef OutputFilePrefix,
const char *InputFileName,
bool OutputIsAsm = false) const;
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args,
const char *InputFileName) const;
};
} // end namespace AMDGCN
} // end namespace tools
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
: public ROCMToolChain {
public:
AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC,
const llvm::opt::ArgList &Args);
const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
}
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
void
addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
SanitizerMask getSupportedSanitizers() const override;
VersionTuple
computeMSVCVersion(const Driver *D,
const llvm::opt::ArgList &Args) const override;
const ToolChain &HostTC;
protected:
Tool *buildLinker() const override;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H

View File

@@ -71,15 +71,6 @@ public:
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
bool useIntegratedAs() const override { return true; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
bool isPIEDefault() const override { return false; }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
bool IsMathErrnoDefault() const override { return false; }
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
void

View File

@@ -0,0 +1,36 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}}
// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}}
// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc"{{.*}}
// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
// CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
// CHECK: clang-offload-wrapper{{.*}}"-target" "x86_64-unknown-linux-gnu" "-o" "{{.*}}a-{{.*}}.bc" {{.*}}amdgpu-openmp-toolchain-{{.*}}.out"
// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-o" "{{.*}}a-{{.*}}.o" "-x" "ir" "{{.*}}a-{{.*}}.bc"
// CHECK: ld{{.*}}"-o" "a.out"{{.*}}"{{.*}}amdgpu-openmp-toolchain-{{.*}}.o" "{{.*}}a-{{.*}}.o" "-lomp" "-lomptarget"
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
// phases
// CHECK-PHASES: 0: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (host-openmp)
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
// CHECK-PHASES: 3: backend, {2}, assembler, (host-openmp)
// CHECK-PHASES: 4: assembler, {3}, object, (host-openmp)
// CHECK-PHASES: 5: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (device-openmp)
// CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
// CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
// CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir
// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)