[PGO][Offload] Allow PGO flags to be used on GPU targets (#94268)
This pull request is the third part of an ongoing effort to extends PGO instrumentation to GPU device code and depends on https://github.com/llvm/llvm-project/pull/93365. This PR makes the following changes: - Allows PGO flags to be supplied to GPU targets - Pulls version global from device - Modifies `__llvm_write_custom_profile` and `lprofWriteDataImpl` to allow the PGO version to be overridden
This commit is contained in:
committed by
GitHub
parent
de2a451058
commit
c50d39f073
@@ -1357,6 +1357,9 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) {
|
||||
|
||||
IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
|
||||
llvm::Triple TT(M.getTargetTriple());
|
||||
if (TT.isAMDGPU() || TT.isNVPTX())
|
||||
IRLevelVersionVariable->setVisibility(
|
||||
llvm::GlobalValue::ProtectedVisibility);
|
||||
if (TT.supportsCOMDAT()) {
|
||||
IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
|
||||
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
|
||||
|
||||
@@ -6388,11 +6388,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
|
||||
options::OPT_fno_convergent_functions);
|
||||
|
||||
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
|
||||
// for sampling, overhead of call arc collection is way too high and there's
|
||||
// no way to collect the output.
|
||||
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
|
||||
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
|
||||
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
|
||||
|
||||
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
|
||||
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
// Check that profiling/coverage arguments doen't get passed down to device-side
|
||||
// compilation.
|
||||
//
|
||||
//
|
||||
// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
|
||||
// XRUN: -fprofile-generate %s 2>&1 | \
|
||||
// XRUN: FileCheck --check-prefixes=CHECK,PROF %s
|
||||
//
|
||||
// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
|
||||
// RUN: -fprofile-instr-generate %s 2>&1 | \
|
||||
// RUN: FileCheck --check-prefixes=CHECK,PROF %s
|
||||
//
|
||||
// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
|
||||
// RUN: -coverage %s 2>&1 | \
|
||||
// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
|
||||
//
|
||||
// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
|
||||
// RUN: -ftest-coverage %s 2>&1 | \
|
||||
// RUN: FileCheck --check-prefixes=CHECK,GCOV %s
|
||||
//
|
||||
// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \
|
||||
// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \
|
||||
// RUN: FileCheck --check-prefixes=CHECK,PROF %s
|
||||
//
|
||||
//
|
||||
// CHECK-NOT: error: unsupported option '-fprofile
|
||||
// CHECK-NOT: error: invalid argument
|
||||
// CHECK-DAG: "-fcuda-is-device"
|
||||
// CHECK-NOT: "-f{{[^"/]*coverage.*}}"
|
||||
// CHECK-NOT: "-fprofile{{[^"]*}}"
|
||||
// CHECK: "-triple" "x86_64-unknown-linux-gnu"
|
||||
// PROF: "-fprofile{{.*}}"
|
||||
// GCOV: "-coverage-notes-file=
|
||||
@@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
|
||||
const __llvm_profile_data *DataEnd,
|
||||
const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin,
|
||||
const char *NamesEnd);
|
||||
const char *NamesEnd,
|
||||
const uint64_t *VersionOverride);
|
||||
|
||||
/*!
|
||||
* This variable is defined in InstrProfilingRuntime.cpp as a hidden
|
||||
|
||||
@@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
|
||||
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
|
||||
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
|
||||
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
|
||||
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
|
||||
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
|
||||
__llvm_profile_get_version());
|
||||
}
|
||||
|
||||
@@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
|
||||
return 0;
|
||||
}
|
||||
|
||||
COMPILER_RT_USED int __llvm_write_custom_profile(
|
||||
const char *Target, const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd, const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
|
||||
int __llvm_write_custom_profile(const char *Target,
|
||||
const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd,
|
||||
const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin,
|
||||
const char *NamesEnd,
|
||||
const uint64_t *VersionOverride) {
|
||||
int ReturnValue = 0, FilenameLength, TargetLength;
|
||||
char *FilenameBuf, *TargetFilename;
|
||||
const char *Filename;
|
||||
@@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
|
||||
ProfDataWriter fileWriter;
|
||||
initFileWriter(&fileWriter, OutputFile);
|
||||
|
||||
uint64_t Version = __llvm_profile_get_version();
|
||||
if (VersionOverride)
|
||||
Version = *VersionOverride;
|
||||
|
||||
/* Write custom data to the file */
|
||||
ReturnValue = lprofWriteDataImpl(
|
||||
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
|
||||
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
|
||||
ReturnValue =
|
||||
lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
|
||||
NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
|
||||
closeFileObject(OutputFile);
|
||||
|
||||
// Restore SIGKILL.
|
||||
|
||||
@@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
|
||||
VPDataReaderType *VPDataReader, const char *NamesBegin,
|
||||
const char *NamesEnd, const VTableProfData *VTableBegin,
|
||||
const VTableProfData *VTableEnd, const char *VNamesBegin,
|
||||
const char *VNamesEnd, int SkipNameDataWrite);
|
||||
const char *VNamesEnd, int SkipNameDataWrite,
|
||||
uint64_t Version);
|
||||
|
||||
/* Merge value profile data pointed to by SrcValueProfData into
|
||||
* in-memory profile counters pointed by to DstData. */
|
||||
|
||||
@@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
|
||||
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
|
||||
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
|
||||
const char *VNamesEnd = __llvm_profile_end_vtabnames();
|
||||
uint64_t Version = __llvm_profile_get_version();
|
||||
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
|
||||
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
|
||||
VNamesBegin, VNamesEnd, SkipNameDataWrite);
|
||||
VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
|
||||
}
|
||||
|
||||
COMPILER_RT_VISIBILITY int
|
||||
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd,
|
||||
const char *CountersBegin, const char *CountersEnd,
|
||||
const char *BitmapBegin, const char *BitmapEnd,
|
||||
VPDataReaderType *VPDataReader, const char *NamesBegin,
|
||||
const char *NamesEnd, const VTableProfData *VTableBegin,
|
||||
const VTableProfData *VTableEnd, const char *VNamesBegin,
|
||||
const char *VNamesEnd, int SkipNameDataWrite) {
|
||||
COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
|
||||
ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd, const char *CountersBegin,
|
||||
const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
|
||||
VPDataReaderType *VPDataReader, const char *NamesBegin,
|
||||
const char *NamesEnd, const VTableProfData *VTableBegin,
|
||||
const VTableProfData *VTableEnd, const char *VNamesBegin,
|
||||
const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
|
||||
/* Calculate size of sections. */
|
||||
const uint64_t DataSectionSize =
|
||||
__llvm_profile_get_data_size(DataBegin, DataEnd);
|
||||
@@ -308,6 +308,7 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
|
||||
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
|
||||
#include "profile/InstrProfData.inc"
|
||||
}
|
||||
Header.Version = Version;
|
||||
|
||||
/* On WIN64, label differences are truncated 32-bit values. Truncate
|
||||
* CountersDelta to match. */
|
||||
|
||||
@@ -469,6 +469,10 @@ createIRLevelProfileFlagVar(Module &M,
|
||||
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
|
||||
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
|
||||
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
|
||||
if (isGPUProfTarget(M))
|
||||
IRLevelVersionVariable->setVisibility(
|
||||
llvm::GlobalValue::ProtectedVisibility);
|
||||
|
||||
Triple TT(M.getTargetTriple());
|
||||
if (TT.supportsCOMDAT()) {
|
||||
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Header
|
||||
//
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
|
||||
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
|
||||
|
||||
@@ -67,15 +67,16 @@ extern "C" {
|
||||
extern int __attribute__((weak)) __llvm_write_custom_profile(
|
||||
const char *Target, const __llvm_profile_data *DataBegin,
|
||||
const __llvm_profile_data *DataEnd, const char *CountersBegin,
|
||||
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
|
||||
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd,
|
||||
const uint64_t *VersionOverride);
|
||||
}
|
||||
|
||||
/// PGO profiling data extracted from a GPU device
|
||||
struct GPUProfGlobals {
|
||||
SmallVector<int64_t> Counts;
|
||||
SmallVector<__llvm_profile_data> Data;
|
||||
SmallVector<uint8_t> NamesData;
|
||||
Triple TargetTriple;
|
||||
uint64_t Version = INSTR_PROF_RAW_VERSION;
|
||||
|
||||
void dump() const;
|
||||
Error write() const;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "Shared/Utils.h"
|
||||
|
||||
#include "llvm/ProfileData/InstrProfData.inc"
|
||||
#include "llvm/Support/Error.h"
|
||||
|
||||
#include <cstring>
|
||||
@@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
|
||||
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
|
||||
return Err;
|
||||
DeviceProfileData.Data.push_back(std::move(Data));
|
||||
} else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
|
||||
uint64_t RawVersionData;
|
||||
GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
|
||||
&RawVersionData);
|
||||
if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
|
||||
return Err;
|
||||
DeviceProfileData.Version = RawVersionData;
|
||||
}
|
||||
}
|
||||
return DeviceProfileData;
|
||||
@@ -295,9 +303,9 @@ Error GPUProfGlobals::write() const {
|
||||
memcpy(NamesBegin, NamesData.data(), NamesData.size());
|
||||
|
||||
// Invoke compiler-rt entrypoint
|
||||
int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
|
||||
DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, NamesBegin, NamesEnd);
|
||||
int result = __llvm_write_custom_profile(
|
||||
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, NamesBegin, NamesEnd, &Version);
|
||||
if (result != 0)
|
||||
return Plugin::error("Error writing GPU PGO data to file");
|
||||
|
||||
|
||||
84
offload/test/offloading/gpupgo/pgo1.c
Normal file
84
offload/test/offloading/gpupgo/pgo1.c
Normal file
@@ -0,0 +1,84 @@
|
||||
// RUN: %libomptarget-compile-generic -fcreate-profile \
|
||||
// RUN: -Xarch_device -fprofile-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.llvm.profraw | \
|
||||
// RUN: %fcheck-generic --check-prefix="LLVM-PGO"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -fcreate-profile \
|
||||
// RUN: -Xarch_device -fprofile-instr-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.clang.profraw | \
|
||||
// RUN: %fcheck-generic --check-prefix="CLANG-PGO"
|
||||
|
||||
// REQUIRES: gpu
|
||||
// REQUIRES: pgo
|
||||
|
||||
int test1(int a) { return a / 2; }
|
||||
int test2(int a) { return a * 2; }
|
||||
|
||||
int main() {
|
||||
int m = 2;
|
||||
#pragma omp target
|
||||
for (int i = 0; i < 10; i++) {
|
||||
m = test1(m);
|
||||
for (int j = 0; j < 2; j++) {
|
||||
m = test2(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 4
|
||||
// LLVM-PGO: Block counts: [20, 10, 2, 1]
|
||||
|
||||
// LLVM-PGO-LABEL: test1:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 1
|
||||
// LLVM-PGO: Block counts: [10]
|
||||
|
||||
// LLVM-PGO-LABEL: test2:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 1
|
||||
// LLVM-PGO: Block counts: [20]
|
||||
|
||||
// LLVM-PGO-LABEL: Instrumentation level:
|
||||
// LLVM-PGO-SAME: IR
|
||||
// LLVM-PGO-SAME: entry_first = 0
|
||||
// LLVM-PGO-LABEL: Functions shown:
|
||||
// LLVM-PGO-SAME: 3
|
||||
// LLVM-PGO-LABEL: Maximum function count:
|
||||
// LLVM-PGO-SAME: 20
|
||||
// LLVM-PGO-LABEL: Maximum internal block count:
|
||||
// LLVM-PGO-SAME: 10
|
||||
|
||||
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 3
|
||||
// CLANG-PGO: Function count: 0
|
||||
// CLANG-PGO: Block counts: [11, 20]
|
||||
|
||||
// CLANG-PGO-LABEL: test1:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 1
|
||||
// CLANG-PGO: Function count: 10
|
||||
// CLANG-PGO: Block counts: []
|
||||
|
||||
// CLANG-PGO-LABEL: test2:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 1
|
||||
// CLANG-PGO: Function count: 20
|
||||
// CLANG-PGO: Block counts: []
|
||||
|
||||
// CLANG-PGO-LABEL: Instrumentation level:
|
||||
// CLANG-PGO-SAME: Front-end
|
||||
// CLANG-PGO-LABEL: Functions shown:
|
||||
// CLANG-PGO-SAME: 3
|
||||
// CLANG-PGO-LABEL: Maximum function count:
|
||||
// CLANG-PGO-SAME: 20
|
||||
// CLANG-PGO-LABEL: Maximum internal block count:
|
||||
// CLANG-PGO-SAME: 20
|
||||
102
offload/test/offloading/gpupgo/pgo2.c
Normal file
102
offload/test/offloading/gpupgo/pgo2.c
Normal file
@@ -0,0 +1,102 @@
|
||||
// RUN: %libomptarget-compile-generic -fprofile-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %basename_t.llvm.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="LLVM-HOST"
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.llvm.profraw \
|
||||
// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -fprofile-instr-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %basename_t.clang.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="CLANG-HOST"
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.clang.profraw | \
|
||||
// RUN: %fcheck-generic --check-prefix="CLANG-DEV"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %basename_t.nogpu.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="LLVM-HOST"
|
||||
// RUN: not test -e %target_triple.%basename_t.nogpu.profraw
|
||||
|
||||
// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
|
||||
// RUN: -Xarch_device -fprofile-instr-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %basename_t.hidf.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="LLVM-HOST"
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.hidf.profraw \
|
||||
// RUN: | %fcheck-generic --check-prefix="CLANG-DEV"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \
|
||||
// RUN: -Xarch_host -fprofile-instr-generate
|
||||
// RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \
|
||||
// RUN: %libomptarget-run-generic 2>&1
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %basename_t.hfdi.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="CLANG-HOST"
|
||||
// RUN: llvm-profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.%basename_t.hfdi.profraw \
|
||||
// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
|
||||
|
||||
// REQUIRES: gpu
|
||||
// REQUIRES: pgo
|
||||
|
||||
int main() {
|
||||
int host_var = 0;
|
||||
for (int i = 0; i < 20; i++) {
|
||||
host_var += i;
|
||||
}
|
||||
|
||||
int device_var = 1;
|
||||
#pragma omp target
|
||||
for (int i = 0; i < 10; i++) {
|
||||
device_var *= i;
|
||||
}
|
||||
}
|
||||
|
||||
// LLVM-HOST-LABEL: main:
|
||||
// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-HOST: Counters: 3
|
||||
// LLVM-HOST: Block counts: [20, 1, 0]
|
||||
|
||||
// LLVM-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-HOST: Counters: 2
|
||||
// LLVM-HOST: Block counts: [0, 0]
|
||||
// LLVM-HOST: Instrumentation level: IR
|
||||
|
||||
// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-DEVICE: Counters: 3
|
||||
// LLVM-DEVICE: Block counts: [10, 2, 1]
|
||||
// LLVM-DEVICE: Instrumentation level: IR
|
||||
|
||||
// CLANG-HOST-LABEL: main:
|
||||
// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-HOST: Counters: 2
|
||||
// CLANG-HOST: Function count: 1
|
||||
// CLANG-HOST: Block counts: [20]
|
||||
|
||||
// CLANG-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-HOST: Counters: 2
|
||||
// CLANG-HOST: Function count: 0
|
||||
// CLANG-HOST: Block counts: [0]
|
||||
// CLANG-HOST: Instrumentation level: Front-end
|
||||
|
||||
// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-DEV: Counters: 2
|
||||
// CLANG-DEV: Function count: 0
|
||||
// CLANG-DEV: Block counts: [11]
|
||||
// CLANG-DEV: Instrumentation level: Front-end
|
||||
@@ -1,66 +0,0 @@
|
||||
// RUN: %libomptarget-compile-generic -fprofile-generate \
|
||||
// RUN: -Xclang "-fprofile-instrument=llvm"
|
||||
// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1
|
||||
// RUN: %profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.llvm.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="LLVM-PGO"
|
||||
|
||||
// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
|
||||
// RUN: -Xclang "-fprofile-instrument=clang"
|
||||
// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1
|
||||
// RUN: %profdata show --all-functions --counts \
|
||||
// RUN: %target_triple.clang.profraw | %fcheck-generic \
|
||||
// RUN: --check-prefix="CLANG-PGO"
|
||||
|
||||
// REQUIRES: gpu
|
||||
// REQUIRES: pgo
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
int test1(int a) { return a / 2; }
|
||||
int test2(int a) { return a * 2; }
|
||||
|
||||
int main() {
|
||||
int m = 2;
|
||||
#pragma omp target
|
||||
for (int i = 0; i < 10; i++) {
|
||||
m = test1(m);
|
||||
for (int j = 0; j < 2; j++) {
|
||||
m = test2(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 4
|
||||
// LLVM-PGO: Block counts: [20, 10, 2, 1]
|
||||
|
||||
// LLVM-PGO-LABEL: test1:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 1
|
||||
// LLVM-PGO: Block counts: [10]
|
||||
|
||||
// LLVM-PGO-LABEL: test2:
|
||||
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// LLVM-PGO: Counters: 1
|
||||
// LLVM-PGO: Block counts: [20]
|
||||
|
||||
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 3
|
||||
// CLANG-PGO: Function count: 0
|
||||
// CLANG-PGO: Block counts: [11, 20]
|
||||
|
||||
// CLANG-PGO-LABEL: test1:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 1
|
||||
// CLANG-PGO: Function count: 10
|
||||
// CLANG-PGO: Block counts: []
|
||||
|
||||
// CLANG-PGO-LABEL: test2:
|
||||
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
|
||||
// CLANG-PGO: Counters: 1
|
||||
// CLANG-PGO: Function count: 20
|
||||
// CLANG-PGO: Block counts: []
|
||||
Reference in New Issue
Block a user