Files
clang-p2996/lld/COFF/SymbolTable.cpp
Jacek Caban 98bc5295ec [LLD][COFF] Check both mangled and demangled symbols before adding a lazy archive symbol to the symbol table on ARM64EC (#113284)
On ARM64EC, a function symbol may appear in both mangled and demangled
forms:
- ARM64EC archives contain only the mangled name, while the demangled
symbol is defined by the object file as an alias.
- x86_64 archives contain only the demangled name (the mangled name is
usually defined by an object referencing the symbol as an alias to a
guess exit thunk).
- ARM64EC import files contain both the mangled and demangled names for
thunks.

If more than one archive defines the same function, this could lead to
different libraries being used for the same function depending on how
they are referenced. Avoid this by checking if the paired symbol is
already defined before adding a symbol to the table.
2024-10-23 13:10:07 +02:00

1004 lines
33 KiB
C++

//===- SymbolTable.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
#include "COFFLinkerContext.h"
#include "Config.h"
#include "Driver.h"
#include "LTO.h"
#include "PDB.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>
using namespace llvm;
namespace lld::coff {
StringRef ltrim1(StringRef s, const char *chars) {
if (!s.empty() && strchr(chars, s[0]))
return s.substr(1);
return s;
}
static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) {
if (mt == IMAGE_FILE_MACHINE_UNKNOWN)
return true;
switch (ctx.config.machine) {
case ARM64:
return mt == ARM64 || mt == ARM64X;
case ARM64EC:
return COFF::isArm64EC(mt) || mt == AMD64;
case ARM64X:
return COFF::isAnyArm64(mt) || mt == AMD64;
default:
return ctx.config.machine == mt;
}
}
void SymbolTable::addFile(InputFile *file) {
log("Reading " + toString(file));
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file))
f->parseLazy();
else
cast<ObjFile>(file)->parseLazy();
} else {
file->parse();
if (auto *f = dyn_cast<ObjFile>(file)) {
ctx.objFileInstances.push_back(f);
} else if (auto *f = dyn_cast<BitcodeFile>(file)) {
if (ltoCompilationDone) {
error("LTO object file " + toString(file) + " linked in after "
"doing LTO compilation.");
}
ctx.bitcodeFileInstances.push_back(f);
} else if (auto *f = dyn_cast<ImportFile>(file)) {
ctx.importFileInstances.push_back(f);
}
}
MachineTypes mt = file->getMachineType();
if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) {
ctx.config.machine = mt;
ctx.driver.addWinSysRootLibSearchPaths();
} else if (!compatibleMachineType(ctx, mt)) {
error(toString(file) + ": machine type " + machineToStr(mt) +
" conflicts with " + machineToStr(ctx.config.machine));
return;
}
ctx.driver.parseDirectives(file);
}
static void errorOrWarn(const Twine &s, bool forceUnresolved) {
if (forceUnresolved)
warn(s);
else
error(s);
}
// Causes the file associated with a lazy symbol to be linked in.
static void forceLazy(Symbol *s) {
s->pendingArchiveLoad = true;
switch (s->kind()) {
case Symbol::Kind::LazyArchiveKind: {
auto *l = cast<LazyArchive>(s);
l->file->addMember(l->sym);
break;
}
case Symbol::Kind::LazyObjectKind: {
InputFile *file = cast<LazyObject>(s)->file;
file->ctx.symtab.addFile(file);
break;
}
case Symbol::Kind::LazyDLLSymbolKind: {
auto *l = cast<LazyDLLSymbol>(s);
l->file->makeImport(l->sym);
break;
}
default:
llvm_unreachable(
"symbol passed to forceLazy is not a LazyArchive or LazyObject");
}
}
// Returns the symbol in SC whose value is <= Addr that is closest to Addr.
// This is generally the global variable or function whose definition contains
// Addr.
static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
DefinedRegular *candidate = nullptr;
for (Symbol *s : sc->file->getSymbols()) {
auto *d = dyn_cast_or_null<DefinedRegular>(s);
if (!d || !d->data || d->file != sc->file || d->getChunk() != sc ||
d->getValue() > addr ||
(candidate && d->getValue() < candidate->getValue()))
continue;
candidate = d;
}
return candidate;
}
static std::vector<std::string> getSymbolLocations(BitcodeFile *file) {
std::string res("\n>>> referenced by ");
StringRef source = file->obj->getSourceFileName();
if (!source.empty())
res += source.str() + "\n>>> ";
res += toString(file);
return {res};
}
static std::optional<std::pair<StringRef, uint32_t>>
getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
std::optional<DILineInfo> optionalLineInfo =
c->file->getDILineInfo(addr, c->getSectionNumber() - 1);
if (!optionalLineInfo)
return std::nullopt;
const DILineInfo &lineInfo = *optionalLineInfo;
if (lineInfo.FileName == DILineInfo::BadString)
return std::nullopt;
return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
}
static std::optional<std::pair<StringRef, uint32_t>>
getFileLine(const SectionChunk *c, uint32_t addr) {
// MinGW can optionally use codeview, even if the default is dwarf.
std::optional<std::pair<StringRef, uint32_t>> fileLine =
getFileLineCodeView(c, addr);
// If codeview didn't yield any result, check dwarf in MinGW mode.
if (!fileLine && c->file->ctx.config.mingw)
fileLine = getFileLineDwarf(c, addr);
return fileLine;
}
// Given a file and the index of a symbol in that file, returns a description
// of all references to that symbol from that file. If no debug information is
// available, returns just the name of the file, else one string per actual
// reference as described in the debug info.
// Returns up to maxStrings string descriptions, along with the total number of
// locations found.
static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) {
struct Location {
Symbol *sym;
std::pair<StringRef, uint32_t> fileLine;
};
std::vector<Location> locations;
size_t numLocations = 0;
for (Chunk *c : file->getChunks()) {
auto *sc = dyn_cast<SectionChunk>(c);
if (!sc)
continue;
for (const coff_relocation &r : sc->getRelocs()) {
if (r.SymbolTableIndex != symIndex)
continue;
numLocations++;
if (locations.size() >= maxStrings)
continue;
std::optional<std::pair<StringRef, uint32_t>> fileLine =
getFileLine(sc, r.VirtualAddress);
Symbol *sym = getSymbol(sc, r.VirtualAddress);
if (fileLine)
locations.push_back({sym, *fileLine});
else if (sym)
locations.push_back({sym, {"", 0}});
}
}
if (maxStrings == 0)
return std::make_pair(std::vector<std::string>(), numLocations);
if (numLocations == 0)
return std::make_pair(
std::vector<std::string>{"\n>>> referenced by " + toString(file)}, 1);
std::vector<std::string> symbolLocations(locations.size());
size_t i = 0;
for (Location loc : locations) {
llvm::raw_string_ostream os(symbolLocations[i++]);
os << "\n>>> referenced by ";
if (!loc.fileLine.first.empty())
os << loc.fileLine.first << ":" << loc.fileLine.second
<< "\n>>> ";
os << toString(file);
if (loc.sym)
os << ":(" << toString(file->ctx, *loc.sym) << ')';
}
return std::make_pair(symbolLocations, numLocations);
}
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
return getSymbolLocations(file, symIndex, SIZE_MAX).first;
}
static std::pair<std::vector<std::string>, size_t>
getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) {
if (auto *o = dyn_cast<ObjFile>(file))
return getSymbolLocations(o, symIndex, maxStrings);
if (auto *b = dyn_cast<BitcodeFile>(file)) {
std::vector<std::string> symbolLocations = getSymbolLocations(b);
size_t numLocations = symbolLocations.size();
if (symbolLocations.size() > maxStrings)
symbolLocations.resize(maxStrings);
return std::make_pair(symbolLocations, numLocations);
}
llvm_unreachable("unsupported file type passed to getSymbolLocations");
return std::make_pair(std::vector<std::string>(), (size_t)0);
}
// For an undefined symbol, stores all files referencing it and the index of
// the undefined symbol in each file.
struct UndefinedDiag {
Symbol *sym;
struct File {
InputFile *file;
uint32_t symIndex;
};
std::vector<File> files;
};
static void reportUndefinedSymbol(const COFFLinkerContext &ctx,
const UndefinedDiag &undefDiag) {
std::string out;
llvm::raw_string_ostream os(out);
os << "undefined symbol: " << toString(ctx, *undefDiag.sym);
const size_t maxUndefReferences = 3;
size_t numDisplayedRefs = 0, numRefs = 0;
for (const UndefinedDiag::File &ref : undefDiag.files) {
auto [symbolLocations, totalLocations] = getSymbolLocations(
ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs);
numRefs += totalLocations;
numDisplayedRefs += symbolLocations.size();
for (const std::string &s : symbolLocations) {
os << s;
}
}
if (numDisplayedRefs < numRefs)
os << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times";
errorOrWarn(out, ctx.config.forceUnresolved);
}
void SymbolTable::loadMinGWSymbols() {
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef)
continue;
if (undef->getWeakAlias())
continue;
StringRef name = undef->getName();
if (ctx.config.machine == I386 && ctx.config.stdcallFixup) {
// Check if we can resolve an undefined decorated symbol by finding
// the intended target as an undecorated symbol (only with a leading
// underscore).
StringRef origName = name;
StringRef baseName = name;
// Trim down stdcall/fastcall/vectorcall symbols to the base name.
baseName = ltrim1(baseName, "_@");
baseName = baseName.substr(0, baseName.find('@'));
// Add a leading underscore, as it would be in cdecl form.
std::string newName = ("_" + baseName).str();
Symbol *l;
if (newName != origName && (l = find(newName)) != nullptr) {
// If we found a symbol and it is lazy; load it.
if (l->isLazy() && !l->pendingArchiveLoad) {
log("Loading lazy " + l->getName() + " from " +
l->getFile()->getName() + " for stdcall fixup");
forceLazy(l);
}
// If it's lazy or already defined, hook it up as weak alias.
if (l->isLazy() || isa<Defined>(l)) {
if (ctx.config.warnStdcallFixup)
warn("Resolving " + origName + " by linking to " + newName);
else
log("Resolving " + origName + " by linking to " + newName);
undef->setWeakAlias(l);
continue;
}
}
}
if (ctx.config.autoImport) {
if (name.starts_with("__imp_"))
continue;
// If we have an undefined symbol, but we have a lazy symbol we could
// load, load it.
Symbol *l = find(("__imp_" + name).str());
if (!l || l->pendingArchiveLoad || !l->isLazy())
continue;
log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() +
" for automatic import");
forceLazy(l);
}
}
}
Defined *SymbolTable::impSymbol(StringRef name) {
if (name.starts_with("__imp_"))
return nullptr;
return dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
}
bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
Defined *imp = impSymbol(name);
if (!imp)
return false;
// Replace the reference directly to a variable with a reference
// to the import address table instead. This obviously isn't right,
// but we mark the symbol as isRuntimePseudoReloc, and a later pass
// will add runtime pseudo relocations for every relocation against
// this Symbol. The runtime pseudo relocation framework expects the
// reference itself to point at the IAT entry.
size_t impSize = 0;
if (isa<DefinedImportData>(imp)) {
log("Automatically importing " + name + " from " +
cast<DefinedImportData>(imp)->getDLLName());
impSize = sizeof(DefinedImportData);
} else if (isa<DefinedRegular>(imp)) {
log("Automatically importing " + name + " from " +
toString(cast<DefinedRegular>(imp)->file));
impSize = sizeof(DefinedRegular);
} else {
warn("unable to automatically import " + name + " from " + imp->getName() +
" from " + toString(cast<DefinedRegular>(imp)->file) +
"; unexpected symbol type");
return false;
}
sym->replaceKeepingName(imp, impSize);
sym->isRuntimePseudoReloc = true;
// There may exist symbols named .refptr.<name> which only consist
// of a single pointer to <name>. If it turns out <name> is
// automatically imported, we don't need to keep the .refptr.<name>
// pointer at all, but redirect all accesses to it to the IAT entry
// for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
DefinedRegular *refptr =
dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
log("Replacing .refptr." + name + " with " + imp->getName());
refptr->getChunk()->live = false;
refptr->replaceKeepingName(imp, impSize);
}
}
return true;
}
/// Helper function for reportUnresolvable and resolveRemainingUndefines.
/// This function emits an "undefined symbol" diagnostic for each symbol in
/// undefs. If localImports is not nullptr, it also emits a "locally
/// defined symbol imported" diagnostic for symbols in localImports.
/// objFiles and bitcodeFiles (if not nullptr) are used to report where
/// undefined symbols are referenced.
static void reportProblemSymbols(
const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
// Return early if there is nothing to report (which should be
// the common case).
if (undefs.empty() && (!localImports || localImports->empty()))
return;
for (Symbol *b : ctx.config.gcroot) {
if (undefs.count(b))
errorOrWarn("<root>: undefined symbol: " + toString(ctx, *b),
ctx.config.forceUnresolved);
if (localImports)
if (Symbol *imp = localImports->lookup(b))
warn("<root>: locally defined symbol imported: " + toString(ctx, *imp) +
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
}
std::vector<UndefinedDiag> undefDiags;
DenseMap<Symbol *, int> firstDiag;
auto processFile = [&](InputFile *file, ArrayRef<Symbol *> symbols) {
uint32_t symIndex = (uint32_t)-1;
for (Symbol *sym : symbols) {
++symIndex;
if (!sym)
continue;
if (undefs.count(sym)) {
auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size());
if (inserted)
undefDiags.push_back({sym, {{file, symIndex}}});
else
undefDiags[it->second].files.push_back({file, symIndex});
}
if (localImports)
if (Symbol *imp = localImports->lookup(sym))
warn(toString(file) +
": locally defined symbol imported: " + toString(ctx, *imp) +
" (defined in " + toString(imp->getFile()) + ") [LNK4217]");
}
};
for (ObjFile *file : ctx.objFileInstances)
processFile(file, file->getSymbols());
if (needBitcodeFiles)
for (BitcodeFile *file : ctx.bitcodeFileInstances)
processFile(file, file->getSymbols());
for (const UndefinedDiag &undefDiag : undefDiags)
reportUndefinedSymbol(ctx, undefDiag);
}
void SymbolTable::reportUnresolvable() {
SmallPtrSet<Symbol *, 8> undefs;
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef || sym->deferUndefined)
continue;
if (undef->getWeakAlias())
continue;
StringRef name = undef->getName();
if (name.starts_with("__imp_")) {
Symbol *imp = find(name.substr(strlen("__imp_")));
if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
def->isUsedInRegularObj = true;
continue;
}
}
if (name.contains("_PchSym_"))
continue;
if (ctx.config.autoImport && impSymbol(name))
continue;
undefs.insert(sym);
}
reportProblemSymbols(ctx, undefs,
/* localImports */ nullptr, true);
}
bool SymbolTable::resolveRemainingUndefines() {
llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
SmallPtrSet<Symbol *, 8> undefs;
DenseMap<Symbol *, Symbol *> localImports;
bool foundLazy = false;
for (auto &i : symMap) {
Symbol *sym = i.second;
auto *undef = dyn_cast<Undefined>(sym);
if (!undef)
continue;
if (!sym->isUsedInRegularObj)
continue;
StringRef name = undef->getName();
// A weak alias may have been resolved, so check for that.
if (undef->resolveWeakAlias())
continue;
// If we can resolve a symbol by removing __imp_ prefix, do that.
// This odd rule is for compatibility with MSVC linker.
if (name.starts_with("__imp_")) {
Symbol *imp = find(name.substr(strlen("__imp_")));
if (imp) {
// The unprefixed symbol might come later in symMap, so handle it now
// so that the condition below can be appropriately applied.
auto *undef = dyn_cast<Undefined>(imp);
if (undef) {
undef->resolveWeakAlias();
}
}
if (imp && imp->isLazy()) {
forceLazy(imp);
foundLazy = true;
continue;
}
if (imp && isa<Defined>(imp)) {
auto *d = cast<Defined>(imp);
replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
localImports[sym] = d;
continue;
}
}
// We don't want to report missing Microsoft precompiled headers symbols.
// A proper message will be emitted instead in PDBLinker::aquirePrecompObj
if (name.contains("_PchSym_"))
continue;
if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name))
continue;
// Remaining undefined symbols are not fatal if /force is specified.
// They are replaced with dummy defined symbols.
if (ctx.config.forceUnresolved)
replaceSymbol<DefinedAbsolute>(sym, ctx, name, 0);
undefs.insert(sym);
}
reportProblemSymbols(
ctx, undefs,
ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
return foundLazy;
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
bool inserted = false;
Symbol *&sym = symMap[CachedHashStringRef(name)];
if (!sym) {
sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
sym->isUsedInRegularObj = false;
sym->pendingArchiveLoad = false;
sym->canInline = true;
inserted = true;
if (isArm64EC(ctx.config.machine) && name.starts_with("EXP+"))
expSymbols.push_back(sym);
}
return {sym, inserted};
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
std::pair<Symbol *, bool> result = insert(name);
if (!file || !isa<BitcodeFile>(file))
result.first->isUsedInRegularObj = true;
return result;
}
void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
entryThunks.push_back({from, to});
}
void SymbolTable::addExitThunk(Symbol *from, Symbol *to) {
exitThunks[from] = to;
}
void SymbolTable::initializeECThunks() {
if (!isArm64EC(ctx.config.machine))
return;
for (auto it : entryThunks) {
auto *to = dyn_cast<Defined>(it.second);
if (!to)
continue;
auto *from = dyn_cast<DefinedRegular>(it.first);
// We need to be able to add padding to the function and fill it with an
// offset to its entry thunks. To ensure that padding the function is
// feasible, functions are required to be COMDAT symbols with no offset.
if (!from || !from->getChunk()->isCOMDAT() ||
cast<DefinedRegular>(from)->getValue()) {
error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
continue;
}
from->getChunk()->setEntryThunk(to);
}
for (ImportFile *file : ctx.importFileInstances) {
if (!file->impchkThunk)
continue;
Symbol *sym = exitThunks.lookup(file->thunkSym);
if (!sym)
sym = exitThunks.lookup(file->impECSym);
file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
}
// On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
// __imp_aux_ symbol references the regular IAT. However, x86_64 code expects
// both to reference the regular IAT, so adjust the symbol if necessary.
parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
if (file->getMachineType() != AMD64)
return;
for (auto &sym : file->getMutableSymbols()) {
auto impSym = dyn_cast_or_null<DefinedImportData>(sym);
if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym)
sym = impSym->file->impSym;
}
});
}
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool overrideLazy) {
auto [s, wasInserted] = insert(name, f);
if (wasInserted || (s->isLazy() && overrideLazy)) {
replaceSymbol<Undefined>(s, name);
return s;
}
if (s->isLazy())
forceLazy(s);
return s;
}
// On ARM64EC, a function symbol may appear in both mangled and demangled forms:
// - ARM64EC archives contain only the mangled name, while the demangled symbol
// is defined by the object file as an alias.
// - x86_64 archives contain only the demangled name (the mangled name is
// usually defined by an object referencing the symbol as an alias to a guess
// exit thunk).
// - ARM64EC import files contain both the mangled and demangled names for
// thunks.
// If more than one archive defines the same function, this could lead
// to different libraries being used for the same function depending on how they
// are referenced. Avoid this by checking if the paired symbol is already
// defined before adding a symbol to the table.
template <typename T>
bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) {
if (name.starts_with("__imp_"))
return true;
std::string pairName;
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name))
pairName = std::move(*mangledName);
else
pairName = *getArm64ECDemangledFunctionName(name);
Symbol *sym = symtab->find(pairName);
if (!sym)
return true;
if (sym->pendingArchiveLoad)
return false;
if (auto u = dyn_cast<Undefined>(sym))
return !u->weakAlias || u->isAntiDep;
// If the symbol is lazy, allow it only if it originates from the same
// archive.
auto lazy = dyn_cast<T>(sym);
return lazy && lazy->file == f;
}
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
if (isArm64EC(ctx.config.machine) &&
!checkLazyECPair<LazyArchive>(this, name, f))
return;
auto [s, wasInserted] = insert(name);
if (wasInserted) {
replaceSymbol<LazyArchive>(s, f, sym);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->addMember(sym);
}
void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
if (isArm64EC(ctx.config.machine) && !checkLazyECPair<LazyObject>(this, n, f))
return;
auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
replaceSymbol<LazyObject>(s, f, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || (u->weakAlias && !u->isECAlias(ctx.config.machine)) ||
s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->lazy = false;
addFile(f);
}
void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
StringRef n) {
auto [s, wasInserted] = insert(n);
if (wasInserted) {
replaceSymbol<LazyDLLSymbol>(s, f, sym, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || u->weakAlias || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
f->makeImport(sym);
}
static std::string getSourceLocationBitcode(BitcodeFile *file) {
std::string res("\n>>> defined at ");
StringRef source = file->obj->getSourceFileName();
if (!source.empty())
res += source.str() + "\n>>> ";
res += toString(file);
return res;
}
static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc,
uint32_t offset, StringRef name) {
std::optional<std::pair<StringRef, uint32_t>> fileLine;
if (sc)
fileLine = getFileLine(sc, offset);
if (!fileLine)
fileLine = file->getVariableLocation(name);
std::string res;
llvm::raw_string_ostream os(res);
os << "\n>>> defined at ";
if (fileLine)
os << fileLine->first << ":" << fileLine->second << "\n>>> ";
os << toString(file);
return res;
}
static std::string getSourceLocation(InputFile *file, SectionChunk *sc,
uint32_t offset, StringRef name) {
if (!file)
return "";
if (auto *o = dyn_cast<ObjFile>(file))
return getSourceLocationObj(o, sc, offset, name);
if (auto *b = dyn_cast<BitcodeFile>(file))
return getSourceLocationBitcode(b);
return "\n>>> defined at " + toString(file);
}
// Construct and print an error message in the form of:
//
// lld-link: error: duplicate symbol: foo
// >>> defined at bar.c:30
// >>> bar.o
// >>> defined at baz.c:563
// >>> baz.o
void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile,
SectionChunk *newSc,
uint32_t newSectionOffset) {
std::string msg;
llvm::raw_string_ostream os(msg);
os << "duplicate symbol: " << toString(ctx, *existing);
DefinedRegular *d = dyn_cast<DefinedRegular>(existing);
if (d && isa<ObjFile>(d->getFile())) {
os << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(),
existing->getName());
} else {
os << getSourceLocation(existing->getFile(), nullptr, 0, "");
}
os << getSourceLocation(newFile, newSc, newSectionOffset,
existing->getName());
if (ctx.config.forceMultiple)
warn(msg);
else
error(msg);
}
Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedAbsolute>(s, ctx, n, sym);
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
if (da->getVA() != sym.getValue())
reportDuplicate(s, nullptr);
} else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedAbsolute>(s, ctx, n, va);
else if (auto *da = dyn_cast<DefinedAbsolute>(s)) {
if (da->getVA() != va)
reportDuplicate(s, nullptr);
} else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy())
replaceSymbol<DefinedSynthetic>(s, n, c);
else if (!isa<DefinedCOFF>(s))
reportDuplicate(s, nullptr);
return s;
}
Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
const coff_symbol_generic *sym, SectionChunk *c,
uint32_t sectionOffset, bool isWeak) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedRegular>(s) || s->isWeak)
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
/*IsExternal*/ true, sym, c, isWeak);
else if (!isWeak)
reportDuplicate(s, f, c, sectionOffset);
return s;
}
std::pair<DefinedRegular *, bool>
SymbolTable::addComdat(InputFile *f, StringRef n,
const coff_symbol_generic *sym) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedRegular>(s)) {
replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
/*IsExternal*/ true, sym, nullptr);
return {cast<DefinedRegular>(s), true};
}
auto *existingSymbol = cast<DefinedRegular>(s);
if (!existingSymbol->isCOMDAT)
reportDuplicate(s, f);
return {existingSymbol, false};
}
Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
const coff_symbol_generic *sym, CommonChunk *c) {
auto [s, wasInserted] = insert(n, f);
if (wasInserted || !isa<DefinedCOFF>(s))
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
else if (auto *dc = dyn_cast<DefinedCommon>(s))
if (size > dc->getSize())
replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
return s;
}
DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
Chunk *&location) {
auto [s, wasInserted] = insert(n, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
replaceSymbol<DefinedImportData>(s, n, f, location);
return cast<DefinedImportData>(s);
}
reportDuplicate(s, f);
return nullptr;
}
Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
ImportThunkChunk *chunk) {
auto [s, wasInserted] = insert(name, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
return cast<Defined>(s);
}
reportDuplicate(s, id->file);
return nullptr;
}
void SymbolTable::addLibcall(StringRef name) {
Symbol *sym = findUnderscore(name);
if (!sym)
return;
if (auto *l = dyn_cast<LazyArchive>(sym)) {
MemoryBufferRef mb = l->getMemberBuffer();
if (isBitcode(mb))
addUndefined(sym->getName());
} else if (LazyObject *o = dyn_cast<LazyObject>(sym)) {
if (isBitcode(o->file->mb))
addUndefined(sym->getName());
}
}
std::vector<Chunk *> SymbolTable::getChunks() const {
std::vector<Chunk *> res;
for (ObjFile *file : ctx.objFileInstances) {
ArrayRef<Chunk *> v = file->getChunks();
res.insert(res.end(), v.begin(), v.end());
}
return res;
}
Symbol *SymbolTable::find(StringRef name) const {
return symMap.lookup(CachedHashStringRef(name));
}
Symbol *SymbolTable::findUnderscore(StringRef name) const {
if (ctx.config.machine == I386)
return find(("_" + name).str());
return find(name);
}
// Return all symbols that start with Prefix, possibly ignoring the first
// character of Prefix or the first character symbol.
std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
std::vector<Symbol *> syms;
for (auto pair : symMap) {
StringRef name = pair.first.val();
if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) ||
name.drop_front().starts_with(prefix) ||
name.drop_front().starts_with(prefix.drop_front())) {
syms.push_back(pair.second);
}
}
return syms;
}
Symbol *SymbolTable::findMangle(StringRef name) {
if (Symbol *sym = find(name)) {
if (auto *u = dyn_cast<Undefined>(sym)) {
// We're specifically looking for weak aliases that ultimately resolve to
// defined symbols, hence the call to getWeakAlias() instead of just using
// the weakAlias member variable. This matches link.exe's behavior.
if (Symbol *weakAlias = u->getWeakAlias())
return weakAlias;
} else {
return sym;
}
}
// Efficient fuzzy string lookup is impossible with a hash table, so iterate
// the symbol table once and collect all possibly matching symbols into this
// vector. Then compare each possibly matching symbol with each possible
// mangling.
std::vector<Symbol *> syms = getSymsWithPrefix(name);
auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
std::string prefix = t.str();
for (auto *s : syms)
if (s->getName().starts_with(prefix))
return s;
return nullptr;
};
// For non-x86, just look for C++ functions.
if (ctx.config.machine != I386)
return findByPrefix("?" + name + "@@Y");
if (!name.starts_with("_"))
return nullptr;
// Search for x86 stdcall function.
if (Symbol *s = findByPrefix(name + "@"))
return s;
// Search for x86 fastcall function.
if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
return s;
// Search for x86 vectorcall function.
if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
return s;
// Search for x86 C++ non-member function.
return findByPrefix("?" + name.substr(1) + "@@Y");
}
Symbol *SymbolTable::addUndefined(StringRef name) {
return addUndefined(name, nullptr, false);
}
void SymbolTable::compileBitcodeFiles() {
ltoCompilationDone = true;
if (ctx.bitcodeFileInstances.empty())
return;
llvm::TimeTraceScope timeScope("Compile bitcode");
ScopedTimer t(ctx.ltoTimer);
lto.reset(new BitcodeCompiler(ctx));
for (BitcodeFile *f : ctx.bitcodeFileInstances)
lto->add(*f);
for (InputFile *newObj : lto->compile()) {
ObjFile *obj = cast<ObjFile>(newObj);
obj->parse();
ctx.objFileInstances.push_back(obj);
}
}
} // namespace lld::coff