Reland '[TextAPI] Add DylibReader' (#75862)

> Add support for reading binary Mach-o dynamic libraries. It uses
libObject APIs for extracting information relevant to TAPI and tbd
files. This includes but is not limited to load commands encode data
like install names, current/compat versions, and symbols.

This originally broke because DylibReader uses Object and Object depends
on TextAPI. Breaking this up in a nested library prevents this cycle.
This commit is contained in:
Cyndy Ishida
2023-12-18 16:55:30 -08:00
committed by GitHub
parent 06b2da4155
commit e3627e2690
8 changed files with 473 additions and 1 deletions

View File

@@ -0,0 +1,43 @@
//===- TextAPI/DylibReader.h - TAPI MachO Dylib Reader ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Defines the MachO Dynamic Library Reader.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TEXTAPI_DYLIBREADER_H
#define LLVM_TEXTAPI_DYLIBREADER_H
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TextAPI/ArchitectureSet.h"
#include "llvm/TextAPI/RecordsSlice.h"
namespace llvm::MachO::DylibReader {
struct ParseOption {
/// Determines arch slice to parse.
ArchitectureSet Archs = ArchitectureSet::All();
/// Capture Mach-O header from binary, primarily load commands.
bool MachOHeader = true;
/// Capture defined symbols out of export trie and n-list.
bool SymbolTable = true;
/// Capture undefined symbols too.
bool Undefineds = true;
};
/// Parse Mach-O dynamic libraries to extract TAPI attributes.
///
/// \param Buffer Data that points to dylib.
/// \param Options Determines which attributes to extract.
/// \return List of record slices.
Expected<Records> readFile(MemoryBufferRef Buffer, const ParseOption &Opt);
} // namespace llvm::MachO::DylibReader
#endif // LLVM_TEXTAPI_DYLIBREADER_H

View File

@@ -103,6 +103,10 @@ public:
bool isFunction() const { return GV == Kind::Function; }
bool isVariable() const { return GV == Kind::Variable; }
void setKind(const Kind &V) {
if (GV == Kind::Unknown)
GV = V;
}
private:
Kind GV;

View File

@@ -181,6 +181,8 @@ private:
std::unique_ptr<BinaryAttrs> BA{nullptr};
};
using Records = llvm::SmallVector<std::shared_ptr<RecordsSlice>, 4>;
} // namespace MachO
} // namespace llvm
#endif // LLVM_TEXTAPI_RECORDSLICE_H

View File

@@ -21,7 +21,8 @@ enum class TextAPIErrorCode {
NoSuchArchitecture,
EmptyResults,
GenericFrontendError,
InvalidInputFormat
InvalidInputFormat,
UnsupportedTarget
};
class TextAPIError : public llvm::ErrorInfo<TextAPIError> {

View File

@@ -0,0 +1,9 @@
add_llvm_component_library(LLVMTextAPIBinaryReader
DylibReader.cpp
LINK_COMPONENTS
Support
Object
TextAPI
TargetParser
)

View File

@@ -0,0 +1,410 @@
//===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Implements the TAPI Reader for Mach-O dynamic libraries.
///
//===----------------------------------------------------------------------===//
#include "llvm/TextAPI/DylibReader.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/Endian.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/TextAPI/RecordsSlice.h"
#include "llvm/TextAPI/TextAPIError.h"
#include <iomanip>
#include <set>
#include <sstream>
#include <string>
using namespace llvm;
using namespace llvm::object;
using namespace llvm::MachO;
using namespace llvm::MachO::DylibReader;
auto TripleCmp = [](const Triple &LHS, const Triple &RHS) {
return LHS.getTriple() < RHS.getTriple();
};
using TripleSet = std::set<Triple, decltype(TripleCmp)>;
static TripleSet constructTriples(MachOObjectFile *Obj,
const Architecture ArchT) {
auto getOSVersionStr = [](uint32_t V) {
PackedVersion OSVersion(V);
std::string Vers;
raw_string_ostream VStream(Vers);
VStream << OSVersion;
return VStream.str();
};
auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
auto Vers = Obj->getVersionMinLoadCommand(cmd);
return getOSVersionStr(Vers.version);
};
// FIXME: Can remove TripleCmp arg when building in c++20.
TripleSet Triples(TripleCmp);
bool IsIntel = ArchitectureSet(ArchT).hasX86();
auto Arch = getArchitectureName(ArchT);
for (const auto &cmd : Obj->load_commands()) {
std::string OSVersion;
switch (cmd.C.cmd) {
case MachO::LC_VERSION_MIN_MACOSX:
OSVersion = getOSVersion(cmd);
Triples.emplace(Arch, "apple", "macos" + OSVersion);
break;
case MachO::LC_VERSION_MIN_IPHONEOS:
OSVersion = getOSVersion(cmd);
if (IsIntel)
Triples.emplace(Arch, "apple", "ios" + OSVersion, "simulator");
else
Triples.emplace(Arch, "apple", "ios" + OSVersion);
break;
case MachO::LC_VERSION_MIN_TVOS:
OSVersion = getOSVersion(cmd);
if (IsIntel)
Triples.emplace(Arch, "apple", "tvos" + OSVersion, "simulator");
else
Triples.emplace(Arch, "apple", "tvos" + OSVersion);
break;
case MachO::LC_VERSION_MIN_WATCHOS:
OSVersion = getOSVersion(cmd);
if (IsIntel)
Triples.emplace(Arch, "apple", "watchos" + OSVersion, "simulator");
else
Triples.emplace(Arch, "apple", "watchos" + OSVersion);
break;
case MachO::LC_BUILD_VERSION: {
OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos);
switch (Obj->getBuildVersionLoadCommand(cmd).platform) {
case MachO::PLATFORM_MACOS:
Triples.emplace(Arch, "apple", "macos" + OSVersion);
break;
case MachO::PLATFORM_IOS:
Triples.emplace(Arch, "apple", "ios" + OSVersion);
break;
case MachO::PLATFORM_TVOS:
Triples.emplace(Arch, "apple", "tvos" + OSVersion);
break;
case MachO::PLATFORM_WATCHOS:
Triples.emplace(Arch, "apple", "watchos" + OSVersion);
break;
case MachO::PLATFORM_BRIDGEOS:
Triples.emplace(Arch, "apple", "bridgeos" + OSVersion);
break;
case MachO::PLATFORM_MACCATALYST:
Triples.emplace(Arch, "apple", "ios" + OSVersion, "macabi");
break;
case MachO::PLATFORM_IOSSIMULATOR:
Triples.emplace(Arch, "apple", "ios" + OSVersion, "simulator");
break;
case MachO::PLATFORM_TVOSSIMULATOR:
Triples.emplace(Arch, "apple", "tvos" + OSVersion, "simulator");
break;
case MachO::PLATFORM_WATCHOSSIMULATOR:
Triples.emplace(Arch, "apple", "watchos" + OSVersion, "simulator");
break;
case MachO::PLATFORM_DRIVERKIT:
Triples.emplace(Arch, "apple", "driverkit" + OSVersion);
break;
default:
break; // Skip any others.
}
break;
}
default:
break;
}
}
// Record unknown platform for older binaries that don't enforce platform
// load commands.
if (Triples.empty())
Triples.emplace(Arch, "apple", "unknown");
return Triples;
}
static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
auto H = Obj->getHeader();
auto &BA = Slice.getBinaryAttrs();
switch (H.filetype) {
default:
llvm_unreachable("unsupported binary type");
case MachO::MH_DYLIB:
BA.File = FileType::MachO_DynamicLibrary;
break;
case MachO::MH_DYLIB_STUB:
BA.File = FileType::MachO_DynamicLibrary_Stub;
break;
case MachO::MH_BUNDLE:
BA.File = FileType::MachO_Bundle;
break;
}
if (H.flags & MachO::MH_TWOLEVEL)
BA.TwoLevelNamespace = true;
if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
BA.AppExtensionSafe = true;
for (const auto &LCI : Obj->load_commands()) {
switch (LCI.C.cmd) {
case MachO::LC_ID_DYLIB: {
auto DLLC = Obj->getDylibIDLoadCommand(LCI);
BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name);
BA.CurrentVersion = DLLC.dylib.current_version;
BA.CompatVersion = DLLC.dylib.compatibility_version;
break;
}
case MachO::LC_REEXPORT_DYLIB: {
auto DLLC = Obj->getDylibIDLoadCommand(LCI);
BA.RexportedLibraries.emplace_back(
Slice.copyString(LCI.Ptr + DLLC.dylib.name));
break;
}
case MachO::LC_SUB_FRAMEWORK: {
auto SFC = Obj->getSubFrameworkCommand(LCI);
BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella);
break;
}
case MachO::LC_SUB_CLIENT: {
auto SCLC = Obj->getSubClientCommand(LCI);
BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client));
break;
}
case MachO::LC_UUID: {
auto UUIDLC = Obj->getUuidCommand(LCI);
std::stringstream Stream;
for (unsigned I = 0; I < 16; ++I) {
if (I == 4 || I == 6 || I == 8 || I == 10)
Stream << '-';
Stream << std::setfill('0') << std::setw(2) << std::uppercase
<< std::hex << static_cast<int>(UUIDLC.uuid[I]);
}
BA.UUID = Slice.copyString(Stream.str());
break;
}
case MachO::LC_RPATH: {
auto RPLC = Obj->getRpathCommand(LCI);
BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path));
break;
}
case MachO::LC_SEGMENT_SPLIT_INFO: {
auto SSILC = Obj->getLinkeditDataLoadCommand(LCI);
if (SSILC.datasize == 0)
BA.OSLibNotForSharedCache = true;
break;
}
default:
break;
}
}
for (auto &Sect : Obj->sections()) {
auto SectName = Sect.getName();
if (!SectName)
return SectName.takeError();
if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
continue;
auto Content = Sect.getContents();
if (!Content)
return Content.takeError();
if ((Content->size() >= 8) && (Content->front() == 0)) {
uint32_t Flags;
if (Obj->isLittleEndian()) {
auto *p =
reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
Flags = *p;
} else {
auto *p =
reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
Flags = *p;
}
BA.SwiftABI = (Flags >> 8) & 0xFF;
}
}
return Error::success();
}
static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
const ParseOption &Opt) {
auto parseExport = [](const auto ExportFlags,
auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
SymbolFlags Flags = SymbolFlags::None;
switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
Flags |= SymbolFlags::WeakDefined;
break;
case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
Flags |= SymbolFlags::ThreadLocalValue;
break;
}
RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
? RecordLinkage::Rexported
: RecordLinkage::Exported;
return {Flags, Linkage};
};
Error Err = Error::success();
StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
// Collect symbols from export trie first. Sometimes, there are more exports
// in the trie than in n-list due to stripping. This is common for swift
// mangled symbols.
for (auto &Sym : Obj->exports(Err)) {
auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage);
Exports[Sym.name()] = {Flags, Linkage};
}
for (const auto &Sym : Obj->symbols()) {
auto FlagsOrErr = Sym.getFlags();
if (!FlagsOrErr)
return FlagsOrErr.takeError();
auto Flags = *FlagsOrErr;
auto NameOrErr = Sym.getName();
if (!NameOrErr)
return NameOrErr.takeError();
auto Name = *NameOrErr;
RecordLinkage Linkage = RecordLinkage::Unknown;
SymbolFlags RecordFlags = SymbolFlags::None;
if (Opt.Undefineds && (Flags & SymbolRef::SF_Undefined)) {
Linkage = RecordLinkage::Undefined;
if (Flags & SymbolRef::SF_Weak)
RecordFlags |= SymbolFlags::WeakReferenced;
} else if (Flags & SymbolRef::SF_Exported) {
auto Exp = Exports.find(Name);
// This should never be possible when binaries are produced with Apple
// linkers. However it is possible to craft dylibs where the export trie
// is either malformed or has conflicting symbols compared to n_list.
if (Exp != Exports.end())
std::tie(RecordFlags, Linkage) = Exp->second;
else
Linkage = RecordLinkage::Exported;
} else if (Flags & SymbolRef::SF_Hidden) {
Linkage = RecordLinkage::Internal;
} else
continue;
auto TypeOrErr = Sym.getType();
if (!TypeOrErr)
return TypeOrErr.takeError();
auto Type = *TypeOrErr;
GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
? GlobalRecord::Kind::Function
: GlobalRecord::Kind::Variable;
if (GV == GlobalRecord::Kind::Function)
RecordFlags |= SymbolFlags::Text;
else
RecordFlags |= SymbolFlags::Data;
Slice.addRecord(Name, RecordFlags, GV, Linkage);
}
return Err;
}
static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
const ParseOption &Opt, const Architecture Arch) {
if (Arch == AK_unknown)
return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
if (Opt.MachOHeader)
if (auto Err = readMachOHeader(Obj, Slice))
return Err;
if (Opt.SymbolTable)
if (auto Err = readSymbols(Obj, Slice, Opt))
return Err;
return Error::success();
}
Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
const ParseOption &Opt) {
Records Results;
auto BinOrErr = createBinary(Buffer);
if (!BinOrErr)
return BinOrErr.takeError();
Binary &Bin = *BinOrErr.get();
if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) {
const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype,
Obj->getHeader().cpusubtype);
if (!Opt.Archs.has(Arch))
return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture);
auto Triples = constructTriples(Obj, Arch);
for (const auto &T : Triples) {
if (mapToPlatformType(T) == PLATFORM_UNKNOWN)
return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
if (auto Err = load(Obj, *Results.back(), Opt, Arch))
return std::move(Err);
Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
}
return Results;
}
// Only expect MachO universal binaries at this point.
assert(isa<MachOUniversalBinary>(&Bin) &&
"Expected a MachO universal binary.");
auto *UB = cast<MachOUniversalBinary>(&Bin);
for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
// Skip architecture if not requested.
auto Arch =
getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType());
if (!Opt.Archs.has(Arch))
continue;
// Skip unknown architectures.
if (Arch == AK_unknown)
continue;
// This can fail if the object is an archive.
auto ObjOrErr = OI->getAsObjectFile();
// Skip the archive and consume the error.
if (!ObjOrErr) {
consumeError(ObjOrErr.takeError());
continue;
}
auto &Obj = *ObjOrErr.get();
switch (Obj.getHeader().filetype) {
default:
break;
case MachO::MH_BUNDLE:
case MachO::MH_DYLIB:
case MachO::MH_DYLIB_STUB:
for (const auto &T : constructTriples(&Obj, Arch)) {
Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
if (auto Err = load(&Obj, *Results.back(), Opt, Arch))
return std::move(Err);
}
break;
}
}
if (Results.empty())
return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults);
return Results;
}

View File

@@ -21,3 +21,5 @@ add_llvm_component_library(LLVMTextAPI
BinaryFormat
TargetParser
)
add_subdirectory(BinaryReader)

View File

@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
Support
Option
TextAPI
TextAPIBinaryReader
)
set(LLVM_TARGET_DEFINITIONS TapiOpts.td)