Files
clang-p2996/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
Nick Kledzik a2d602560b [mach-o] Make anonymous atom out of section content before any symbol
In sections that are broken into atoms at symbols, if the first symbol in the
section is not at the start of the section, then make an anonymous atom for
the section content that is before the first symbol.

llvm-svn: 210142
2014-06-04 00:34:27 +00:00

435 lines
17 KiB
C++

//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
///
/// +------------+
/// | normalized |
/// +------------+
/// |
/// |
/// v
/// +-------+
/// | Atoms |
/// +-------+
#include "MachONormalizedFile.h"
#include "MachONormalizedFileBinaryUtils.h"
#include "File.h"
#include "Atoms.h"
#include "lld/Core/Error.h"
#include "lld/Core/LLVM.h"
#include "llvm/Support/MachO.h"
using namespace llvm::MachO;
using namespace lld::mach_o::normalized;
namespace lld {
namespace mach_o {
namespace { // anonymous
/// Figures out ContentType of a mach-o section.
DefinedAtom::ContentType atomTypeFromSection(const Section &section) {
struct MachORelocatableSectionToAtomType {
StringRef segmentName;
StringRef sectionName;
SectionType sectionType;
DefinedAtom::ContentType atomType;
};
#define ENTRY(seg, sect, type, atomType) \
{seg, sect, type, DefinedAtom::atomType }
static const MachORelocatableSectionToAtomType sectsToAtomType[] = {
ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
ENTRY("", "", S_CSTRING_LITERALS, typeCString),
ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
ENTRY("__DATA", "__data", S_REGULAR, typeData),
ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
typeInitializerPtr),
ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
typeTerminatorPtr),
ENTRY("__DATA", "___got", S_NON_LAZY_SYMBOL_POINTERS,
typeGOT),
ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS,
typeGOT),
ENTRY("__LD", "__compact_unwind", S_REGULAR,
typeCompactUnwindInfo),
ENTRY("", "", S_REGULAR, typeUnknown)
};
#undef ENTRY
// First look for match of name and type. Empty names in table are wildcards.
for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
p->atomType != DefinedAtom::typeUnknown; ++p) {
if (p->sectionType != section.type)
continue;
if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
continue;
if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
continue;
return p->atomType;
}
// Look for code denoted by section attributes
if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
return DefinedAtom::typeCode;
return DefinedAtom::typeUnknown;
}
enum AtomizeModel {
atomizeAtSymbols,
atomizeFixedSize,
atomizePointerSize,
atomizeUTF8,
atomizeUTF16,
atomizeCFI,
atomizeCU
};
/// Returns info on how to atomize a section of the specified ContentType.
void sectionParseInfo(DefinedAtom::ContentType atomType,
unsigned int &sizeMultiple,
DefinedAtom::Scope &scope,
DefinedAtom::Merge &merge,
AtomizeModel &atomizeModel) {
struct ParseInfo {
DefinedAtom::ContentType atomType;
unsigned int sizeMultiple;
DefinedAtom::Scope scope;
DefinedAtom::Merge merge;
AtomizeModel atomizeModel;
};
#define ENTRY(type, size, scope, merge, model) \
{DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
static const ParseInfo parseInfo[] = {
ENTRY(typeCode, 1, scopeGlobal, mergeNo,
atomizeAtSymbols),
ENTRY(typeData, 1, scopeGlobal, mergeNo,
atomizeAtSymbols),
ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
atomizeAtSymbols),
ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
atomizeAtSymbols),
ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
atomizeAtSymbols),
ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
atomizeUTF8),
ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
atomizeUTF16),
ENTRY(typeCFI, 1, scopeTranslationUnit, mergeNo,
atomizeCFI),
ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
atomizeFixedSize),
ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
atomizeFixedSize),
ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
atomizeFixedSize),
ENTRY(typeCFString, 16, scopeLinkageUnit, mergeByContent,
atomizeFixedSize),
ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
atomizePointerSize),
ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
atomizePointerSize),
ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
atomizeCU),
ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
atomizeFixedSize),
ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
atomizePointerSize),
ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
atomizeAtSymbols)
};
#undef ENTRY
const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
for (int i=0; i < tableLen; ++i) {
if (parseInfo[i].atomType == atomType) {
sizeMultiple = parseInfo[i].sizeMultiple;
scope = parseInfo[i].scope;
merge = parseInfo[i].merge;
atomizeModel = parseInfo[i].atomizeModel;
return;
}
}
// Unknown type is atomized by symbols.
sizeMultiple = 1;
scope = DefinedAtom::scopeGlobal;
merge = DefinedAtom::mergeNo;
atomizeModel = atomizeAtSymbols;
}
Atom::Scope atomScope(uint8_t scope) {
switch (scope) {
case N_EXT:
return Atom::scopeGlobal;
case N_PEXT | N_EXT:
return Atom::scopeLinkageUnit;
case 0:
return Atom::scopeTranslationUnit;
}
llvm_unreachable("unknown scope value!");
}
void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
uint32_t sectionIndex,
SmallVector<const Symbol *, 64> &outSyms) {
for (const Symbol &sym : inSymbols) {
// Only look at definition symbols.
if ((sym.type & N_TYPE) != N_SECT)
continue;
if (sym.sect != sectionIndex)
continue;
outSyms.push_back(&sym);
}
}
void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
bool symbolWeakDef, Atom::Scope symbolScope,
uint64_t nextSymbolAddr, bool copyRefs) {
// Mach-O symbol table does have size in it. Instead the size is the
// difference between this and the next symbol.
uint64_t size = nextSymbolAddr - symbolAddr;
if (section.type == llvm::MachO::S_ZEROFILL) {
file.addZeroFillDefinedAtom(symbolName, symbolScope, size, copyRefs);
} else {
uint64_t offset = symbolAddr - section.address;
ArrayRef<uint8_t> atomContent = section.content.slice(offset, size);
DefinedAtom::Merge merge = symbolWeakDef
? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
if (atomType == DefinedAtom::typeUnknown) {
// Mach-O needs a segment and section name. Concatentate those two
// with a / seperator (e.g. "seg/sect") to fit into the lld model
// of just a section name.
std::string segSectName = section.segmentName.str()
+ "/" + section.sectionName.str();
file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
merge, atomContent, segSectName, true);
} else {
file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
atomContent, copyRefs);
}
}
}
error_code processSymboledSection(DefinedAtom::ContentType atomType,
const Section &section,
const NormalizedFile &normalizedFile,
MachOFile &file, bool copyRefs) {
// Find section's index.
uint32_t sectIndex = 1;
for (auto &sect : normalizedFile.sections) {
if (&sect == &section)
break;
++sectIndex;
}
// Find all symbols in this section.
SmallVector<const Symbol *, 64> symbols;
appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
// Sort symbols by address.
std::sort(symbols.begin(), symbols.end(),
[](const Symbol *lhs, const Symbol *rhs) -> bool {
return lhs->value < rhs->value;
});
// Debug logging of symbols.
//for (const Symbol *sym : symbols)
// llvm::errs() << "sym: " << sym->value << ", " << sym->name << "\n";
// If section has no symbols and no content, there are no atoms.
if (symbols.empty() && section.content.empty())
return error_code();
const uint64_t firstSymbolAddr = symbols.front()->value;
if (firstSymbolAddr != section.address) {
// Section has anonymous content before first symbol.
atomFromSymbol(atomType, section, file, section.address, StringRef(),
false, Atom::scopeTranslationUnit, firstSymbolAddr, copyRefs);
}
const Symbol *lastSym = nullptr;
bool lastSymIsWeakDef;
for (const Symbol *sym : symbols) {
if (lastSym != nullptr) {
atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
lastSymIsWeakDef, atomScope(lastSym->scope), sym->value, copyRefs);
}
lastSym = sym;
lastSymIsWeakDef = (lastSym->desc & N_WEAK_DEF);
}
if (lastSym != nullptr) {
atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
lastSymIsWeakDef, atomScope(lastSym->scope),
section.address + section.content.size(), copyRefs);
}
return error_code();
}
error_code processSection(DefinedAtom::ContentType atomType,
const Section &section,
const NormalizedFile &normalizedFile,
MachOFile &file, bool copyRefs) {
const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
const bool swap = !MachOLinkingContext::isHostEndian(normalizedFile.arch);
// Get info on how to atomize section.
unsigned int sizeMultiple;
DefinedAtom::Scope scope;
DefinedAtom::Merge merge;
AtomizeModel atomizeModel;
sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
// Validate section size.
if ((section.content.size() % sizeMultiple) != 0)
return make_dynamic_error_code(Twine("Section ") + section.segmentName
+ "/" + section.sectionName
+ " has size ("
+ Twine(section.content.size())
+ ") which is not a multiple of "
+ Twine(sizeMultiple) );
if (atomizeModel == atomizeAtSymbols) {
// Break section up into atoms each with a fixed size.
return processSymboledSection(atomType, section, normalizedFile, file,
copyRefs);
} else {
const uint32_t *cfi;
unsigned int size;
for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
switch (atomizeModel) {
case atomizeFixedSize:
// Break section up into atoms each with a fixed size.
size = sizeMultiple;
break;
case atomizePointerSize:
// Break section up into atoms each the size of a pointer.
size = is64 ? 8 : 4;;
break;
case atomizeUTF8:
// Break section up into zero terminated c-strings.
size = 0;
for (unsigned int i=0; offset+i < e; ++i) {
if (section.content[i] == 0) {
size = i+1;
break;
}
}
break;
case atomizeUTF16:
// Break section up into zero terminated UTF16 strings.
size = 0;
for (unsigned int i=0; offset+i < e; i += 2) {
if ((section.content[i] == 0) && (section.content[i+1] == 0)) {
size = i+2;
break;
}
}
break;
case atomizeCFI:
// Break section up into dwarf unwind CFIs (FDE or CIE).
cfi = reinterpret_cast<const uint32_t *>(&section.content[offset]);
size = read32(swap, *cfi) + 4;
if (offset+size > section.content.size()) {
return make_dynamic_error_code(Twine(Twine("Section ")
+ section.segmentName
+ "/" + section.sectionName
+ " is malformed. Size of CFI "
"starting at offset ("
+ Twine(offset)
+ ") is past end of section."));
}
break;
case atomizeCU:
// Break section up into compact unwind entries.
size = is64 ? 32 : 20;
break;
case atomizeAtSymbols:
break;
}
if (size == 0) {
return make_dynamic_error_code(Twine("Section ") + section.segmentName
+ "/" + section.sectionName
+ " is malformed. The last atom is "
"not zero terminated.");
}
ArrayRef<uint8_t> byteContent = section.content.slice(offset, size);
file.addDefinedAtom(StringRef(), scope, atomType, merge, byteContent,
copyRefs);
offset += size;
}
}
return error_code();
}
ErrorOr<std::unique_ptr<lld::File>>
normalizedObjectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
bool copyRefs) {
std::unique_ptr<MachOFile> file(new MachOFile(path));
// Create atoms from each section.
for (auto &sect : normalizedFile.sections) {
DefinedAtom::ContentType atomType = atomTypeFromSection(sect);
if (error_code ec = processSection(atomType, sect, normalizedFile, *file,
copyRefs))
return ec;
}
// Create atoms from undefined symbols.
for (auto &sym : normalizedFile.undefinedSymbols) {
// Undefinded symbols with n_value != 0 are actually tentative definitions.
if (sym.value == Hex64(0)) {
file->addUndefinedAtom(sym.name, copyRefs);
} else {
file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
DefinedAtom::Alignment(sym.desc >> 8), copyRefs);
}
}
return std::unique_ptr<File>(std::move(file));
}
} // anonymous namespace
namespace normalized {
ErrorOr<std::unique_ptr<lld::File>>
normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
bool copyRefs) {
switch (normalizedFile.fileType) {
case MH_OBJECT:
return normalizedObjectToAtoms(normalizedFile, path, copyRefs);
default:
llvm_unreachable("unhandled MachO file type!");
}
}
} // namespace normalized
} // namespace mach_o
} // namespace lld