[lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (#140670)
``` /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and /// "yyyy" are numbers that could change between builds. We need to use the root /// symbol name before this suffix so these symbols can be matched with profiles /// which may have different suffixes. ``` Just like what we are doing in BP, https://github.com/llvm/llvm-project/blob/main/lld/MachO/BPSectionOrderer.cpp#L127 the patch removes the suffixes when parsing the order file and getting the symbol priority to have a better symbol match. --------- Co-authored-by: Sharon Xu <sharonxu@fb.com> Co-authored-by: Ellis Hoag <ellis.sparky.hoag@gmail.com>
This commit is contained in:
@@ -34,6 +34,7 @@ add_lld_library(lldCommon
|
|||||||
Strings.cpp
|
Strings.cpp
|
||||||
TargetOptionsCommandFlags.cpp
|
TargetOptionsCommandFlags.cpp
|
||||||
Timer.cpp
|
Timer.cpp
|
||||||
|
Utils.cpp
|
||||||
VCSVersion.inc
|
VCSVersion.inc
|
||||||
Version.cpp
|
Version.cpp
|
||||||
|
|
||||||
|
|||||||
22
lld/Common/Utils.cpp
Normal file
22
lld/Common/Utils.cpp
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
//===- Utils.cpp ------------------------------------------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The file defines utils functions that can be shared across archs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "lld/Common/Utils.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
using namespace lld;
|
||||||
|
|
||||||
|
StringRef lld::utils::getRootSymbol(StringRef name) {
|
||||||
|
name.consume_back(".Tgm");
|
||||||
|
auto [P0, S0] = name.rsplit(".llvm.");
|
||||||
|
auto [P1, S1] = P0.rsplit(".__uniq.");
|
||||||
|
return P1;
|
||||||
|
}
|
||||||
@@ -81,7 +81,8 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
|
|||||||
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
|
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
|
||||||
!orderer.secToSym.try_emplace(sec, d).second)
|
!orderer.secToSym.try_emplace(sec, d).second)
|
||||||
return;
|
return;
|
||||||
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
|
rootSymbolToSectionIdxs[CachedHashStringRef(
|
||||||
|
lld::utils::getRootSymbol(sym.getName()))]
|
||||||
.insert(sections.size());
|
.insert(sections.size());
|
||||||
sections.emplace_back(sec);
|
sections.emplace_back(sec);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
|
|||||||
size_t idx = sections.size();
|
size_t idx = sections.size();
|
||||||
sections.emplace_back(isec);
|
sections.emplace_back(isec);
|
||||||
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
|
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
|
||||||
auto rootName = getRootSymbol(sym->getName());
|
auto rootName = lld::utils::getRootSymbol(sym->getName());
|
||||||
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
|
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
|
||||||
if (auto linkageName =
|
if (auto linkageName =
|
||||||
BPOrdererMachO::getResolvedLinkageName(rootName))
|
BPOrdererMachO::getResolvedLinkageName(rootName))
|
||||||
|
|||||||
@@ -21,6 +21,7 @@
|
|||||||
#include "lld/Common/Args.h"
|
#include "lld/Common/Args.h"
|
||||||
#include "lld/Common/CommonLinkerContext.h"
|
#include "lld/Common/CommonLinkerContext.h"
|
||||||
#include "lld/Common/ErrorHandler.h"
|
#include "lld/Common/ErrorHandler.h"
|
||||||
|
#include "lld/Common/Utils.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/MapVector.h"
|
#include "llvm/ADT/MapVector.h"
|
||||||
#include "llvm/Support/Path.h"
|
#include "llvm/Support/Path.h"
|
||||||
@@ -250,7 +251,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
|
|||||||
if (sym->isAbsolute())
|
if (sym->isAbsolute())
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
|
|
||||||
auto it = priorities.find(sym->getName());
|
auto it = priorities.find(utils::getRootSymbol(sym->getName()));
|
||||||
if (it == priorities.end())
|
if (it == priorities.end())
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
const SymbolPriorityEntry &entry = it->second;
|
const SymbolPriorityEntry &entry = it->second;
|
||||||
@@ -330,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
symbol = line.trim();
|
symbol = utils::getRootSymbol(line.trim());
|
||||||
|
|
||||||
if (!symbol.empty()) {
|
if (!symbol.empty()) {
|
||||||
SymbolPriorityEntry &entry = priorities[symbol];
|
SymbolPriorityEntry &entry = priorities[symbol];
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "lld/Common/ErrorHandler.h"
|
#include "lld/Common/ErrorHandler.h"
|
||||||
|
#include "lld/Common/Utils.h"
|
||||||
#include "llvm/ADT/CachedHashString.h"
|
#include "llvm/ADT/CachedHashString.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/MapVector.h"
|
#include "llvm/ADT/MapVector.h"
|
||||||
@@ -147,19 +148,6 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
|
|||||||
return sectionUns;
|
return sectionUns;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
|
|
||||||
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
|
|
||||||
/// the global merge functions suffix
|
|
||||||
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
|
|
||||||
/// name before this suffix so these symbols can be matched with profiles which
|
|
||||||
/// may have different suffixes.
|
|
||||||
inline StringRef getRootSymbol(StringRef name) {
|
|
||||||
name.consume_back(".Tgm");
|
|
||||||
auto [P0, S0] = name.rsplit(".llvm.");
|
|
||||||
auto [P1, S1] = P0.rsplit(".__uniq.");
|
|
||||||
return P1;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class D>
|
template <class D>
|
||||||
auto BPOrderer<D>::computeOrder(
|
auto BPOrderer<D>::computeOrder(
|
||||||
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
|
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
|
||||||
@@ -197,7 +185,7 @@ auto BPOrderer<D>::computeOrder(
|
|||||||
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
|
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
|
||||||
auto [_, parsedFuncName] = getParsedIRPGOName(
|
auto [_, parsedFuncName] = getParsedIRPGOName(
|
||||||
reader->getSymtab().getFuncOrVarName(trace[timestamp]));
|
reader->getSymtab().getFuncOrVarName(trace[timestamp]));
|
||||||
parsedFuncName = getRootSymbol(parsedFuncName);
|
parsedFuncName = lld::utils::getRootSymbol(parsedFuncName);
|
||||||
|
|
||||||
auto sectionIdxsIt =
|
auto sectionIdxsIt =
|
||||||
rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
|
rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
|
||||||
@@ -375,7 +363,7 @@ auto BPOrderer<D>::computeOrder(
|
|||||||
// 4?
|
// 4?
|
||||||
uint64_t lastPage = endAddress / pageSize;
|
uint64_t lastPage = endAddress / pageSize;
|
||||||
StringRef rootSymbol = D::getSymName(*sym);
|
StringRef rootSymbol = D::getSymName(*sym);
|
||||||
rootSymbol = getRootSymbol(rootSymbol);
|
rootSymbol = lld::utils::getRootSymbol(rootSymbol);
|
||||||
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
|
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
|
||||||
if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
|
if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
|
||||||
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
|
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
|
||||||
@@ -393,7 +381,7 @@ auto BPOrderer<D>::computeOrder(
|
|||||||
auto traceId = trace.FunctionNameRefs[step];
|
auto traceId = trace.FunctionNameRefs[step];
|
||||||
auto [Filename, ParsedFuncName] =
|
auto [Filename, ParsedFuncName] =
|
||||||
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
|
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
|
||||||
ParsedFuncName = getRootSymbol(ParsedFuncName);
|
ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName);
|
||||||
auto it = symbolToPageNumbers.find(ParsedFuncName);
|
auto it = symbolToPageNumbers.find(ParsedFuncName);
|
||||||
if (it != symbolToPageNumbers.end()) {
|
if (it != symbolToPageNumbers.end()) {
|
||||||
auto &[firstPage, lastPage] = it->getValue();
|
auto &[firstPage, lastPage] = it->getValue();
|
||||||
|
|||||||
30
lld/include/lld/Common/Utils.h
Normal file
30
lld/include/lld/Common/Utils.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
//===- Utils.h ------------------------------------------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The file declares utils functions that can be shared across archs.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLD_UTILS_H
|
||||||
|
#define LLD_UTILS_H
|
||||||
|
|
||||||
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
|
||||||
|
namespace lld {
|
||||||
|
namespace utils {
|
||||||
|
|
||||||
|
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
|
||||||
|
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
|
||||||
|
/// the global merge functions suffix
|
||||||
|
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
|
||||||
|
/// name before this suffix so these symbols can be matched with profiles which
|
||||||
|
/// may have different suffixes.
|
||||||
|
llvm::StringRef getRootSymbol(llvm::StringRef Name);
|
||||||
|
} // namespace utils
|
||||||
|
} // namespace lld
|
||||||
|
|
||||||
|
#endif
|
||||||
94
lld/test/MachO/order-file-strip-hashes.s
Normal file
94
lld/test/MachO/order-file-strip-hashes.s
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# RUN: rm -rf %t && split-file %s %t
|
||||||
|
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
|
||||||
|
|
||||||
|
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
|
||||||
|
# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
|
||||||
|
|
||||||
|
#--- a.s
|
||||||
|
.text
|
||||||
|
.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
|
||||||
|
|
||||||
|
_main:
|
||||||
|
ret
|
||||||
|
A:
|
||||||
|
ret
|
||||||
|
F:
|
||||||
|
add w0, w0, #3
|
||||||
|
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
|
||||||
|
ret
|
||||||
|
C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
|
||||||
|
add w0, w0, #2
|
||||||
|
bl A
|
||||||
|
ret
|
||||||
|
D:
|
||||||
|
add w0, w0, #2
|
||||||
|
bl B
|
||||||
|
ret
|
||||||
|
B:
|
||||||
|
add w0, w0, #1
|
||||||
|
bl A
|
||||||
|
ret
|
||||||
|
E:
|
||||||
|
add w0, w0, #2
|
||||||
|
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
|
||||||
|
ret
|
||||||
|
|
||||||
|
.section __DATA,__objc_const
|
||||||
|
# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
|
||||||
|
_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
|
||||||
|
.quad 789
|
||||||
|
|
||||||
|
_OBJC_$_CATEGORY_SOME_$_FOLDED:
|
||||||
|
_OBJC_$_CATEGORY_Foo_$_Cat1:
|
||||||
|
_ALPHABETIC_SORT_FIRST:
|
||||||
|
.quad 123
|
||||||
|
|
||||||
|
_OBJC_$_CATEGORY_Foo_$_Cat2:
|
||||||
|
.quad 222
|
||||||
|
|
||||||
|
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
|
||||||
|
.quad 456
|
||||||
|
|
||||||
|
.section __DATA,__objc_data
|
||||||
|
_OBJC_CLASS_$_Foo:
|
||||||
|
.quad 123
|
||||||
|
|
||||||
|
_OBJC_CLASS_$_Bar.llvm.1234:
|
||||||
|
.quad 456
|
||||||
|
|
||||||
|
_OBJC_CLASS_$_Baz:
|
||||||
|
.quad 789
|
||||||
|
|
||||||
|
_OBJC_CLASS_$_Baz2:
|
||||||
|
.quad 999
|
||||||
|
|
||||||
|
.section __DATA,__objc_classrefs
|
||||||
|
.quad _OBJC_CLASS_$_Foo
|
||||||
|
.quad _OBJC_CLASS_$_Bar.llvm.1234
|
||||||
|
.quad _OBJC_CLASS_$_Baz
|
||||||
|
|
||||||
|
.subsections_via_symbols
|
||||||
|
|
||||||
|
|
||||||
|
#--- ord-1
|
||||||
|
# change order, parital covered
|
||||||
|
A
|
||||||
|
B
|
||||||
|
C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
|
||||||
|
_OBJC_CLASS_$_Baz
|
||||||
|
_OBJC_CLASS_$_Bar.__uniq.12345
|
||||||
|
_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
|
||||||
|
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
|
||||||
|
_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567
|
||||||
|
|
||||||
|
# .text
|
||||||
|
# CHECK: A
|
||||||
|
# CHECK: B
|
||||||
|
# CHECK: C
|
||||||
|
# .section __DATA,__objc_const
|
||||||
|
# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
|
||||||
|
# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
|
||||||
|
# .section __DATA,__objc_data
|
||||||
|
# CHECK: _OBJC_CLASS_$_Baz
|
||||||
|
# CHECK: _OBJC_CLASS_$_Bar
|
||||||
|
# CHECK: _OBJC_CLASS_$_Foo
|
||||||
Reference in New Issue
Block a user