[lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (#140670)

```
/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
/// "yyyy" are numbers that could change between builds. We need to use the root
/// symbol name before this suffix so these symbols can be matched with profiles
/// which may have different suffixes.
```
Just like what we are doing in BP,
https://github.com/llvm/llvm-project/blob/main/lld/MachO/BPSectionOrderer.cpp#L127

the patch removes the suffixes when parsing the order file and getting
the symbol priority to have a better symbol match.

---------

Co-authored-by: Sharon Xu <sharonxu@fb.com>
Co-authored-by: Ellis Hoag <ellis.sparky.hoag@gmail.com>
This commit is contained in:
SharonXSharon
2025-06-03 10:12:36 -07:00
committed by GitHub
parent 95ce58bc4a
commit 79cc728b77
8 changed files with 157 additions and 20 deletions

View File

@@ -34,6 +34,7 @@ add_lld_library(lldCommon
Strings.cpp Strings.cpp
TargetOptionsCommandFlags.cpp TargetOptionsCommandFlags.cpp
Timer.cpp Timer.cpp
Utils.cpp
VCSVersion.inc VCSVersion.inc
Version.cpp Version.cpp

22
lld/Common/Utils.cpp Normal file
View File

@@ -0,0 +1,22 @@
//===- Utils.cpp ------------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// The file defines utils functions that can be shared across archs.
//
//===----------------------------------------------------------------------===//
#include "lld/Common/Utils.h"
using namespace llvm;
using namespace lld;
StringRef lld::utils::getRootSymbol(StringRef name) {
name.consume_back(".Tgm");
auto [P0, S0] = name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
return P1;
}

View File

@@ -81,7 +81,8 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec || if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
!orderer.secToSym.try_emplace(sec, d).second) !orderer.secToSym.try_emplace(sec, d).second)
return; return;
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))] rootSymbolToSectionIdxs[CachedHashStringRef(
lld::utils::getRootSymbol(sym.getName()))]
.insert(sections.size()); .insert(sections.size());
sections.emplace_back(sec); sections.emplace_back(sec);
}; };

View File

@@ -124,7 +124,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
size_t idx = sections.size(); size_t idx = sections.size();
sections.emplace_back(isec); sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) { for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
auto rootName = getRootSymbol(sym->getName()); auto rootName = lld::utils::getRootSymbol(sym->getName());
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx); rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
if (auto linkageName = if (auto linkageName =
BPOrdererMachO::getResolvedLinkageName(rootName)) BPOrdererMachO::getResolvedLinkageName(rootName))

View File

@@ -21,6 +21,7 @@
#include "lld/Common/Args.h" #include "lld/Common/Args.h"
#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/ErrorHandler.h" #include "lld/Common/ErrorHandler.h"
#include "lld/Common/Utils.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h" #include "llvm/ADT/MapVector.h"
#include "llvm/Support/Path.h" #include "llvm/Support/Path.h"
@@ -250,7 +251,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
if (sym->isAbsolute()) if (sym->isAbsolute())
return std::nullopt; return std::nullopt;
auto it = priorities.find(sym->getName()); auto it = priorities.find(utils::getRootSymbol(sym->getName()));
if (it == priorities.end()) if (it == priorities.end())
return std::nullopt; return std::nullopt;
const SymbolPriorityEntry &entry = it->second; const SymbolPriorityEntry &entry = it->second;
@@ -330,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
break; break;
} }
} }
symbol = line.trim(); symbol = utils::getRootSymbol(line.trim());
if (!symbol.empty()) { if (!symbol.empty()) {
SymbolPriorityEntry &entry = priorities[symbol]; SymbolPriorityEntry &entry = priorities[symbol];

View File

@@ -20,6 +20,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "lld/Common/ErrorHandler.h" #include "lld/Common/ErrorHandler.h"
#include "lld/Common/Utils.h"
#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h" #include "llvm/ADT/MapVector.h"
@@ -147,19 +148,6 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
return sectionUns; return sectionUns;
} }
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
/// the global merge functions suffix
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
/// name before this suffix so these symbols can be matched with profiles which
/// may have different suffixes.
inline StringRef getRootSymbol(StringRef name) {
name.consume_back(".Tgm");
auto [P0, S0] = name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
return P1;
}
template <class D> template <class D>
auto BPOrderer<D>::computeOrder( auto BPOrderer<D>::computeOrder(
StringRef profilePath, bool forFunctionCompression, bool forDataCompression, StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
@@ -197,7 +185,7 @@ auto BPOrderer<D>::computeOrder(
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
auto [_, parsedFuncName] = getParsedIRPGOName( auto [_, parsedFuncName] = getParsedIRPGOName(
reader->getSymtab().getFuncOrVarName(trace[timestamp])); reader->getSymtab().getFuncOrVarName(trace[timestamp]));
parsedFuncName = getRootSymbol(parsedFuncName); parsedFuncName = lld::utils::getRootSymbol(parsedFuncName);
auto sectionIdxsIt = auto sectionIdxsIt =
rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName)); rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
@@ -375,7 +363,7 @@ auto BPOrderer<D>::computeOrder(
// 4? // 4?
uint64_t lastPage = endAddress / pageSize; uint64_t lastPage = endAddress / pageSize;
StringRef rootSymbol = D::getSymName(*sym); StringRef rootSymbol = D::getSymName(*sym);
rootSymbol = getRootSymbol(rootSymbol); rootSymbol = lld::utils::getRootSymbol(rootSymbol);
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol)) if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(), symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
@@ -393,7 +381,7 @@ auto BPOrderer<D>::computeOrder(
auto traceId = trace.FunctionNameRefs[step]; auto traceId = trace.FunctionNameRefs[step];
auto [Filename, ParsedFuncName] = auto [Filename, ParsedFuncName] =
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
ParsedFuncName = getRootSymbol(ParsedFuncName); ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName);
auto it = symbolToPageNumbers.find(ParsedFuncName); auto it = symbolToPageNumbers.find(ParsedFuncName);
if (it != symbolToPageNumbers.end()) { if (it != symbolToPageNumbers.end()) {
auto &[firstPage, lastPage] = it->getValue(); auto &[firstPage, lastPage] = it->getValue();

View File

@@ -0,0 +1,30 @@
//===- Utils.h ------------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// The file declares utils functions that can be shared across archs.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_UTILS_H
#define LLD_UTILS_H
#include "llvm/ADT/StringRef.h"
namespace lld {
namespace utils {
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
/// the global merge functions suffix
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
/// name before this suffix so these symbols can be matched with profiles which
/// may have different suffixes.
llvm::StringRef getRootSymbol(llvm::StringRef Name);
} // namespace utils
} // namespace lld
#endif

View File

@@ -0,0 +1,94 @@
# RUN: rm -rf %t && split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
#--- a.s
.text
.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
_main:
ret
A:
ret
F:
add w0, w0, #3
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret
C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
add w0, w0, #2
bl A
ret
D:
add w0, w0, #2
bl B
ret
B:
add w0, w0, #1
bl A
ret
E:
add w0, w0, #2
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret
.section __DATA,__objc_const
# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
.quad 789
_OBJC_$_CATEGORY_SOME_$_FOLDED:
_OBJC_$_CATEGORY_Foo_$_Cat1:
_ALPHABETIC_SORT_FIRST:
.quad 123
_OBJC_$_CATEGORY_Foo_$_Cat2:
.quad 222
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
.quad 456
.section __DATA,__objc_data
_OBJC_CLASS_$_Foo:
.quad 123
_OBJC_CLASS_$_Bar.llvm.1234:
.quad 456
_OBJC_CLASS_$_Baz:
.quad 789
_OBJC_CLASS_$_Baz2:
.quad 999
.section __DATA,__objc_classrefs
.quad _OBJC_CLASS_$_Foo
.quad _OBJC_CLASS_$_Bar.llvm.1234
.quad _OBJC_CLASS_$_Baz
.subsections_via_symbols
#--- ord-1
# change order, parital covered
A
B
C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
_OBJC_CLASS_$_Baz
_OBJC_CLASS_$_Bar.__uniq.12345
_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567
# .text
# CHECK: A
# CHECK: B
# CHECK: C
# .section __DATA,__objc_const
# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
# .section __DATA,__objc_data
# CHECK: _OBJC_CLASS_$_Baz
# CHECK: _OBJC_CLASS_$_Bar
# CHECK: _OBJC_CLASS_$_Foo