Files
clang-p2996/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
alx32 6f28b4b5e9 [GSYM] Add support for querying merged functions in llvm-gsymutil (#120991)
Adds the ability to lookup and display all merged functions for an
address in llvm-gsymutil.

Now, when `--merged-functions` is used in combination with
`--address/--addresses-from-stdin`, lookup results will contain
information about merged functions, if available.

To support printing merged function information when using the
`--verbose` option, the `LookupResult` data structure also had to be
extended with pointers to the raw function data and raw merged function
data. This is because merged functions share the same address range, so
it's not easy to look up the raw merged function data for a particular
`LookupResult` that is based on a merged function.
2025-01-06 11:55:27 -08:00

344 lines
13 KiB
C++

//===- FunctionInfo.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/Support/DataExtractor.h"
#include <optional>
using namespace llvm;
using namespace gsym;
/// FunctionInfo information type that is used to encode the optional data
/// that is associated with a FunctionInfo object.
enum InfoType : uint32_t {
EndOfList = 0u,
LineTableInfo = 1u,
InlineInfo = 2u,
MergedFunctionsInfo = 3u,
CallSiteInfo = 4u,
};
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n';
if (FI.OptLineTable)
OS << FI.OptLineTable << '\n';
if (FI.Inline)
OS << FI.Inline << '\n';
if (FI.CallSites)
OS << *FI.CallSites << '\n';
return OS;
}
llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
uint64_t BaseAddr) {
FunctionInfo FI;
uint64_t Offset = 0;
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset);
FI.Range = {BaseAddr, BaseAddr + Data.getU32(&Offset)};
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset);
FI.Name = Data.getU32(&Offset);
if (FI.Name == 0)
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x",
Offset - 4, FI.Name);
bool Done = false;
while (!Done) {
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset);
const uint32_t IT = Data.getU32(&Offset);
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset);
const uint32_t InfoLength = Data.getU32(&Offset);
if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
Offset, IT);
DataExtractor InfoData(Data.getData().substr(Offset, InfoLength),
Data.isLittleEndian(),
Data.getAddressSize());
switch (IT) {
case InfoType::EndOfList:
Done = true;
break;
case InfoType::LineTableInfo:
if (Expected<LineTable> LT = LineTable::decode(InfoData, BaseAddr))
FI.OptLineTable = std::move(LT.get());
else
return LT.takeError();
break;
case InfoType::InlineInfo:
if (Expected<InlineInfo> II = InlineInfo::decode(InfoData, BaseAddr))
FI.Inline = std::move(II.get());
else
return II.takeError();
break;
case InfoType::MergedFunctionsInfo:
if (Expected<MergedFunctionsInfo> MI =
MergedFunctionsInfo::decode(InfoData, BaseAddr))
FI.MergedFunctions = std::move(MI.get());
else
return MI.takeError();
break;
case InfoType::CallSiteInfo:
if (Expected<llvm::gsym::CallSiteInfoCollection> CI =
llvm::gsym::CallSiteInfoCollection::decode(InfoData))
FI.CallSites = std::move(CI.get());
else
return CI.takeError();
break;
default:
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": unsupported InfoType %u",
Offset-8, IT);
}
Offset += InfoLength;
}
return std::move(FI);
}
uint64_t FunctionInfo::cacheEncoding() {
EncodingCache.clear();
if (!isValid())
return 0;
raw_svector_ostream OutStrm(EncodingCache);
FileWriter FW(OutStrm, llvm::endianness::native);
llvm::Expected<uint64_t> Result = encode(FW);
if (!Result) {
EncodingCache.clear();
consumeError(Result.takeError());
return 0;
}
return EncodingCache.size();
}
llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
bool NoPadding) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
// Align FunctionInfo data to a 4 byte alignment, if padding is allowed
if (NoPadding == false)
Out.alignTo(4);
const uint64_t FuncInfoOffset = Out.tell();
// Check if we have already encoded this function info into EncodingCache.
// This will be non empty when creating segmented GSYM files as we need to
// precompute exactly how big FunctionInfo objects encode into so we can
// accurately make segments of a specific size.
if (!EncodingCache.empty() &&
llvm::endianness::native == Out.getByteOrder()) {
// We already encoded this object, just write out the bytes.
Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
EncodingCache.size()));
return FuncInfoOffset;
}
// Write the size in bytes of this function as a uint32_t. This can be zero
// if we just have a symbol from a symbol table and that symbol has no size.
Out.writeU32(size());
// Write the name of this function as a uint32_t string table offset.
Out.writeU32(Name);
if (OptLineTable) {
Out.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
llvm::Error err = OptLineTable->encode(Out, Range.start());
if (err)
return std::move(err);
const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
// Fixup the size of the LineTable data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the inline function info if we have any and if it is valid.
if (Inline) {
Out.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
llvm::Error err = Inline->encode(Out, Range.start());
if (err)
return std::move(err);
const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
// Fixup the size of the InlineInfo data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the merged functions info if we have any and if it is valid.
if (MergedFunctions) {
Out.writeU32(InfoType::MergedFunctionsInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
llvm::Error err = MergedFunctions->encode(Out);
if (err)
return std::move(err);
const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(
std::errc::invalid_argument,
"MergedFunctionsInfo length is greater than UINT32_MAX");
// Fixup the size of the MergedFunctionsInfo data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the call sites if we have any and if they are valid.
if (CallSites) {
Out.writeU32(InfoType::CallSiteInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the CallSites out with the number of bytes that were written.
Out.writeU32(0);
const auto StartOffset = Out.tell();
Error Err = CallSites->encode(Out);
if (Err)
return std::move(Err);
const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"CallSites length is greater than UINT32_MAX");
// Fixup the size of the CallSites data with the correct size.
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Terminate the data chunks with an end of list with zero size.
Out.writeU32(InfoType::EndOfList);
Out.writeU32(0);
return FuncInfoOffset;
}
llvm::Expected<LookupResult>
FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
uint64_t FuncAddr, uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData) {
LookupResult LR;
LR.LookupAddr = Addr;
uint64_t Offset = 0;
LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(&Offset)};
uint32_t NameOffset = Data.getU32(&Offset);
// The "lookup" functions doesn't report errors as accurately as the "decode"
// function as it is meant to be fast. For more accurage errors we could call
// "decode".
if (!Data.isValidOffset(Offset))
return createStringError(std::errc::io_error,
"FunctionInfo data is truncated");
// This function will be called with the result of a binary search of the
// address table, we must still make sure the address does not fall into a
// gap between functions are after the last function.
if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr))
return createStringError(std::errc::io_error,
"address 0x%" PRIx64 " is not in GSYM", Addr);
if (NameOffset == 0)
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000",
Offset - 4);
LR.FuncName = GR.getString(NameOffset);
bool Done = false;
std::optional<LineEntry> LineEntry;
std::optional<DataExtractor> InlineInfoData;
while (!Done) {
if (!Data.isValidOffsetForDataOfSize(Offset, 8))
return createStringError(std::errc::io_error,
"FunctionInfo data is truncated");
const uint32_t IT = Data.getU32(&Offset);
const uint32_t InfoLength = Data.getU32(&Offset);
const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength);
if (InfoLength != InfoBytes.size())
return createStringError(std::errc::io_error,
"FunctionInfo data is truncated");
DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
Data.getAddressSize());
switch (IT) {
case InfoType::EndOfList:
Done = true;
break;
case InfoType::LineTableInfo:
if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr))
LineEntry = ExpectedLE.get();
else
return ExpectedLE.takeError();
break;
case InfoType::MergedFunctionsInfo:
// Store the merged functions data for later parsing, if needed.
if (MergedFuncsData)
*MergedFuncsData = InfoData;
break;
case InfoType::InlineInfo:
// We will parse the inline info after our line table, but only if
// we have a line entry.
InlineInfoData = InfoData;
break;
default:
break;
}
Offset += InfoLength;
}
if (!LineEntry) {
// We don't have a valid line entry for our address, fill in our source
// location as best we can and return.
SourceLocation SrcLoc;
SrcLoc.Name = LR.FuncName;
SrcLoc.Offset = Addr - FuncAddr;
LR.Locations.push_back(SrcLoc);
return LR;
}
std::optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File);
if (!LineEntryFile)
return createStringError(std::errc::invalid_argument,
"failed to extract file[%" PRIu32 "]",
LineEntry->File);
SourceLocation SrcLoc;
SrcLoc.Name = LR.FuncName;
SrcLoc.Offset = Addr - FuncAddr;
SrcLoc.Dir = GR.getString(LineEntryFile->Dir);
SrcLoc.Base = GR.getString(LineEntryFile->Base);
SrcLoc.Line = LineEntry->Line;
LR.Locations.push_back(SrcLoc);
// If we don't have inline information, we are done.
if (!InlineInfoData)
return LR;
// We have inline information. Try to augment the lookup result with this
// data.
llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr,
LR.Locations);
if (Err)
return std::move(Err);
return LR;
}