[PGO][Offload] Fix offload coverage mapping (#143490)
This pull request fixes coverage mapping on GPU targets. - It adds an address space cast to the coverage mapping generation pass. - It reads the profiled function names from the ELF directly. Reading it from public globals was causing issues in cases where multiple device-code object files are linked together.
This commit is contained in:
committed by
GitHub
parent
3cef099ced
commit
67ff66e677
@@ -80,6 +80,7 @@ struct GPUProfGlobals {
|
||||
|
||||
void dump() const;
|
||||
Error write() const;
|
||||
bool empty() const;
|
||||
};
|
||||
|
||||
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
|
||||
@@ -192,9 +193,6 @@ public:
|
||||
/*D2H=*/false);
|
||||
}
|
||||
|
||||
/// Checks whether a given image contains profiling globals.
|
||||
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
|
||||
|
||||
/// Reads profiling data from a GPU image to supplied profdata struct.
|
||||
/// Iterates through the image symbol table and stores global values
|
||||
/// with profiling prefixes.
|
||||
|
||||
@@ -173,16 +173,6 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
|
||||
DeviceImageTy &Image) {
|
||||
GlobalTy global(getInstrProfNamesVarName().str(), 0);
|
||||
if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
|
||||
consumeError(std::move(Err));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Expected<GPUProfGlobals>
|
||||
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
|
||||
DeviceImageTy &Image) {
|
||||
@@ -204,12 +194,17 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
|
||||
// Check if given current global is a profiling global based
|
||||
// on name
|
||||
if (*NameOrErr == getInstrProfNamesVarName()) {
|
||||
// Read in profiled function names
|
||||
DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
|
||||
GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
|
||||
DeviceProfileData.NamesData.data());
|
||||
if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
|
||||
return Err;
|
||||
// Read in profiled function names from ELF
|
||||
auto SectionOrErr = Sym.getSection();
|
||||
if (!SectionOrErr)
|
||||
return SectionOrErr.takeError();
|
||||
|
||||
auto ContentsOrErr = (*SectionOrErr)->getContents();
|
||||
if (!ContentsOrErr)
|
||||
return ContentsOrErr.takeError();
|
||||
|
||||
SmallVector<uint8_t> NameBytes(ContentsOrErr->bytes());
|
||||
DeviceProfileData.NamesData = NameBytes;
|
||||
} else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
|
||||
// Read global variable profiling counts
|
||||
SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
|
||||
@@ -322,3 +317,7 @@ Error GPUProfGlobals::write() const {
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
bool GPUProfGlobals::empty() const {
|
||||
return Counts.empty() && Data.empty() && NamesData.empty();
|
||||
}
|
||||
|
||||
@@ -858,14 +858,13 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
|
||||
|
||||
for (auto *Image : LoadedImages) {
|
||||
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
|
||||
if (!Handler.hasProfilingGlobals(*this, *Image))
|
||||
continue;
|
||||
|
||||
GPUProfGlobals profdata;
|
||||
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
|
||||
if (!ProfOrErr)
|
||||
return ProfOrErr.takeError();
|
||||
|
||||
if (ProfOrErr->empty())
|
||||
continue;
|
||||
|
||||
// Dump out profdata
|
||||
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
|
||||
uint32_t(DeviceDebugKind::PGODump))
|
||||
|
||||
Reference in New Issue
Block a user