[PGO][Offload] Fix offload coverage mapping (#143490)

This pull request fixes coverage mapping on GPU targets. 

- It adds an address space cast to the coverage mapping generation pass.
- It reads the profiled function names from the ELF directly. Reading it
from public globals was causing issues in cases where multiple
device-code object files are linked together.
This commit is contained in:
Ethan Luis McDonough
2025-06-10 20:19:38 -05:00
committed by GitHub
parent 3cef099ced
commit 67ff66e677
5 changed files with 22 additions and 31 deletions

View File

@@ -2622,8 +2622,9 @@ void CoverageMappingModuleGen::emit() {
CGM.addUsedGlobal(CovData);
// Create the deferred function records array
if (!FunctionNames.empty()) {
auto NamesArrTy = llvm::ArrayType::get(llvm::PointerType::getUnqual(Ctx),
FunctionNames.size());
auto AddrSpace = FunctionNames.front()->getType()->getPointerAddressSpace();
auto NamesArrTy = llvm::ArrayType::get(
llvm::PointerType::get(Ctx, AddrSpace), FunctionNames.size());
auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
// This variable will *NOT* be emitted to the object file. It is used
// to pass the list of names referenced to codegen.

View File

@@ -1955,12 +1955,6 @@ void InstrLowerer::emitNameData() {
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
// Make names variable public if current target is a GPU
if (isGPUProfTarget(M)) {
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
}
NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(

View File

@@ -80,6 +80,7 @@ struct GPUProfGlobals {
void dump() const;
Error write() const;
bool empty() const;
};
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
@@ -192,9 +193,6 @@ public:
/*D2H=*/false);
}
/// Checks whether a given image contains profiling globals.
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
/// Reads profiling data from a GPU image to supplied profdata struct.
/// Iterates through the image symbol table and stores global values
/// with profiling prefixes.

View File

@@ -173,16 +173,6 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
return Plugin::success();
}
bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
GlobalTy global(getInstrProfNamesVarName().str(), 0);
if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
consumeError(std::move(Err));
return false;
}
return true;
}
Expected<GPUProfGlobals>
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image) {
@@ -204,12 +194,17 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
// Check if given current global is a profiling global based
// on name
if (*NameOrErr == getInstrProfNamesVarName()) {
// Read in profiled function names
DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
DeviceProfileData.NamesData.data());
if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
return Err;
// Read in profiled function names from ELF
auto SectionOrErr = Sym.getSection();
if (!SectionOrErr)
return SectionOrErr.takeError();
auto ContentsOrErr = (*SectionOrErr)->getContents();
if (!ContentsOrErr)
return ContentsOrErr.takeError();
SmallVector<uint8_t> NameBytes(ContentsOrErr->bytes());
DeviceProfileData.NamesData = NameBytes;
} else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
// Read global variable profiling counts
SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
@@ -322,3 +317,7 @@ Error GPUProfGlobals::write() const {
return Plugin::success();
}
bool GPUProfGlobals::empty() const {
return Counts.empty() && Data.empty() && NamesData.empty();
}

View File

@@ -858,14 +858,13 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
for (auto *Image : LoadedImages) {
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (!Handler.hasProfilingGlobals(*this, *Image))
continue;
GPUProfGlobals profdata;
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
if (!ProfOrErr)
return ProfOrErr.takeError();
if (ProfOrErr->empty())
continue;
// Dump out profdata
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
uint32_t(DeviceDebugKind::PGODump))