[BOLT] Abort on out-of-section symbols in GOT (#100801)
This patch aborts BOLT execution if it finds out-of-section (section end) symbol in GOT table. In order to handle such situations properly in future, we would need to have an arch-dependent way to analyze relocations or its sequences, e.g., for ARM it would probably be ADRP + LDR analysis in order to get GOT entry address. Currently, it is also challenging because GOT-related relocation symbols are replaced to __BOLT_got_zero. Anyway, it seems to be quite a rare case, which seems to be only? related to static binaries. For the most part, it seems that it should be handled on the linker stage, since static binary should not have GOT table at all. LLD linker with relaxations enabled would replace instruction addresses from GOT directly to target symbols, which eliminates the problem. Anyway, in order to achieve detection of such cases, this patch fixes a few things in BOLT: 1. For the end symbols, we're now using the section provided by ELF binary. Previously it would be tied with a wrong section found by symbol address. 2. The end symbols would have limited registration we would only add them in name->data GlobalSymbols map, since using address->data BinaryDataMap map would likely be impossible due to address duality of such symbols. 3. The outdated BD->getSection (currently returning refence, not pointer) check in postProcessSymbolTable is replaced by getSize check in order to allow zero-sized top-level symbols if they are located in zero-sized sections. For the most part, such things could only be found in tests, but I don't see a reason not to handle such cases. 4. Updated section-end-sym test and removed x86_64 requirement since there is no reason for this (tested on aarch64 linux) The test was provided by peterwaller-arm (thank you) in #100096 and slightly modified by me.
This commit is contained in:
committed by
GitHub
parent
097ddd3565
commit
a4900f0d93
@@ -911,7 +911,8 @@ public:
|
||||
/// of \p Flags.
|
||||
MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
|
||||
uint64_t Size, uint16_t Alignment,
|
||||
unsigned Flags = 0);
|
||||
unsigned Flags = 0,
|
||||
BinarySection *Section = NULL);
|
||||
|
||||
/// Return BinaryData registered at a given \p Address or nullptr if no
|
||||
/// global symbol was registered at the location.
|
||||
|
||||
@@ -1056,18 +1056,28 @@ void BinaryContext::adjustCodePadding() {
|
||||
MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
|
||||
uint64_t Size,
|
||||
uint16_t Alignment,
|
||||
unsigned Flags) {
|
||||
unsigned Flags,
|
||||
BinarySection *Section) {
|
||||
// Register the name with MCContext.
|
||||
MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
|
||||
BinaryData *BD;
|
||||
|
||||
// Register out of section symbols only in GlobalSymbols map
|
||||
if (Section && Section->getEndAddress() == Address) {
|
||||
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
|
||||
*Section, Flags);
|
||||
GlobalSymbols[Name] = BD;
|
||||
return Symbol;
|
||||
}
|
||||
|
||||
auto GAI = BinaryDataMap.find(Address);
|
||||
BinaryData *BD;
|
||||
if (GAI == BinaryDataMap.end()) {
|
||||
ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
|
||||
BinarySection &Section =
|
||||
SectionOrErr ? SectionOrErr.get() : absoluteSection();
|
||||
BinarySection &SectionRef = Section ? *Section
|
||||
: SectionOrErr ? SectionOrErr.get()
|
||||
: absoluteSection();
|
||||
BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
|
||||
Section, Flags);
|
||||
SectionRef, Flags);
|
||||
GAI = BinaryDataMap.emplace(Address, BD).first;
|
||||
GlobalSymbols[Name] = BD;
|
||||
updateObjectNesting(GAI);
|
||||
@@ -1402,7 +1412,7 @@ void BinaryContext::postProcessSymbolTable() {
|
||||
if ((BD->getName().starts_with("SYMBOLat") ||
|
||||
BD->getName().starts_with("DATAat")) &&
|
||||
!BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
|
||||
BD->getSection()) {
|
||||
BD->getSection().getSize()) {
|
||||
this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
|
||||
<< "\n";
|
||||
Valid = false;
|
||||
|
||||
@@ -955,13 +955,13 @@ void RewriteInstance::discoverFileObjects() {
|
||||
uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
|
||||
uint64_t SymbolAlignment = Symbol.getAlignment();
|
||||
|
||||
auto registerName = [&](uint64_t FinalSize) {
|
||||
auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) {
|
||||
// Register names even if it's not a function, e.g. for an entry point.
|
||||
BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
|
||||
SymbolAlignment, SymbolFlags);
|
||||
SymbolAlignment, SymbolFlags, Section);
|
||||
if (!AlternativeName.empty())
|
||||
BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
|
||||
SymbolAlignment, SymbolFlags);
|
||||
SymbolAlignment, SymbolFlags, Section);
|
||||
};
|
||||
|
||||
section_iterator Section =
|
||||
@@ -986,12 +986,25 @@ void RewriteInstance::discoverFileObjects() {
|
||||
<< " for function\n");
|
||||
|
||||
if (SymbolAddress == Section->getAddress() + Section->getSize()) {
|
||||
ErrorOr<BinarySection &> SectionOrError =
|
||||
BC->getSectionForAddress(Section->getAddress());
|
||||
|
||||
// Skip symbols from invalid sections
|
||||
if (!SectionOrError) {
|
||||
BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
|
||||
<< Twine::utohexstr(SymbolAddress)
|
||||
<< ") does not have any section\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(SymbolSize == 0 &&
|
||||
"unexpect non-zero sized symbol at end of section");
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
|
||||
registerName(SymbolSize);
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName
|
||||
<< " points to end of " << SectionOrError->getName()
|
||||
<< " section\n";
|
||||
});
|
||||
registerName(SymbolSize, &SectionOrError.get());
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2623,6 +2636,30 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
|
||||
}
|
||||
}
|
||||
|
||||
if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) {
|
||||
auto exitOnGotEndSymol = [&](StringRef Name) {
|
||||
BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported "
|
||||
"section end symbol "
|
||||
<< Name << "\n";
|
||||
exit(1);
|
||||
};
|
||||
|
||||
if (SymbolIter != InputFile->symbol_end() && ReferencedSection) {
|
||||
if (cantFail(SymbolIter->getAddress()) ==
|
||||
ReferencedSection->getEndAddress())
|
||||
exitOnGotEndSymol(cantFail(SymbolIter->getName()));
|
||||
} else {
|
||||
// If no section and symbol are provided by relocation, try to find the
|
||||
// symbol by its name, including the possibility that the symbol is local.
|
||||
BinaryData *BD = BC->getBinaryDataByName(SymbolName);
|
||||
if (!BD && NR.getUniquifiedNameCount(SymbolName) == 1)
|
||||
BD = BC->getBinaryDataByName(NR.getUniqueName(SymbolName, 1));
|
||||
|
||||
if ((BD && BD->getAddress() == BD->getSection().getEndAddress()))
|
||||
exitOnGotEndSymol(BD->getName());
|
||||
}
|
||||
}
|
||||
|
||||
if (!ReferencedSection)
|
||||
ReferencedSection = BC->getSectionForAddress(SymbolAddress);
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
SECTIONS {
|
||||
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
|
||||
.data : { *(.data) *(.array) }
|
||||
.text : { *(.text) }
|
||||
.got : { *(.got) *(.igot) }
|
||||
}
|
||||
28
bolt/test/AArch64/got_end_of_section_symbol.s
Normal file
28
bolt/test/AArch64/got_end_of_section_symbol.s
Normal file
@@ -0,0 +1,28 @@
|
||||
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: %clang %cflags -nostartfiles -nodefaultlibs -static -Wl,--no-relax \
|
||||
# RUN: -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script \
|
||||
# RUN: %t.o -o %t.exe
|
||||
# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end
|
||||
# CHECK-SAME: symbol array_end
|
||||
|
||||
.section .array, "a", @progbits
|
||||
.globl array_start
|
||||
.globl array_end
|
||||
array_start:
|
||||
.word 0
|
||||
array_end:
|
||||
|
||||
.section .text
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
adrp x1, #:got:array_start
|
||||
ldr x1, [x1, #:got_lo12:array_start]
|
||||
adrp x0, #:got:array_end
|
||||
ldr x0, [x0, #:got_lo12:array_end]
|
||||
adrp x2, #:got:_start
|
||||
ldr x2, [x2, #:got_lo12:_start]
|
||||
ret
|
||||
@@ -1,7 +1,7 @@
|
||||
## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as
|
||||
## functions.
|
||||
|
||||
# REQUIRES: x86_64-linux, asserts
|
||||
# REQUIRES: system-linux, asserts
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
|
||||
# RUN: ld.lld %t.o -o %t.exe -q
|
||||
@@ -9,7 +9,7 @@
|
||||
# RUN: | FileCheck %s
|
||||
|
||||
# CHECK: considering symbol etext for function
|
||||
# CHECK-NEXT: rejecting as symbol points to end of its section
|
||||
# CHECK-NEXT: rejecting as symbol etext points to end of .text section
|
||||
# CHECK-NOT: Binary Function "etext{{.*}}" after building cfg
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user