From 79cc728b77018bb1d87f0c327f3013aac85ba9fa Mon Sep 17 00:00:00 2001 From: SharonXSharon Date: Tue, 3 Jun 2025 10:12:36 -0700 Subject: [PATCH] [lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (#140670) ``` /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and /// "yyyy" are numbers that could change between builds. We need to use the root /// symbol name before this suffix so these symbols can be matched with profiles /// which may have different suffixes. ``` Just like what we are doing in BP, https://github.com/llvm/llvm-project/blob/main/lld/MachO/BPSectionOrderer.cpp#L127 the patch removes the suffixes when parsing the order file and getting the symbol priority to have a better symbol match. --------- Co-authored-by: Sharon Xu Co-authored-by: Ellis Hoag --- lld/Common/CMakeLists.txt | 1 + lld/Common/Utils.cpp | 22 +++++ lld/ELF/BPSectionOrderer.cpp | 3 +- lld/MachO/BPSectionOrderer.cpp | 2 +- lld/MachO/SectionPriorities.cpp | 5 +- .../lld/Common/BPSectionOrdererBase.inc | 20 +--- lld/include/lld/Common/Utils.h | 30 ++++++ lld/test/MachO/order-file-strip-hashes.s | 94 +++++++++++++++++++ 8 files changed, 157 insertions(+), 20 deletions(-) create mode 100644 lld/Common/Utils.cpp create mode 100644 lld/include/lld/Common/Utils.h create mode 100644 lld/test/MachO/order-file-strip-hashes.s diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 4f503d04f784..a9e8d72fb5ec 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -34,6 +34,7 @@ add_lld_library(lldCommon Strings.cpp TargetOptionsCommandFlags.cpp Timer.cpp + Utils.cpp VCSVersion.inc Version.cpp diff --git a/lld/Common/Utils.cpp b/lld/Common/Utils.cpp new file mode 100644 index 000000000000..56f6b5d90924 --- /dev/null +++ b/lld/Common/Utils.cpp @@ -0,0 +1,22 @@ +//===- Utils.cpp ------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +// +// The file defines utils functions that can be shared across archs. +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/Utils.h" + +using namespace llvm; +using namespace lld; + +StringRef lld::utils::getRootSymbol(StringRef name) { + name.consume_back(".Tgm"); + auto [P0, S0] = name.rsplit(".llvm."); + auto [P1, S1] = P0.rsplit(".__uniq."); + return P1; +} diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp index 793176c7725a..f464b1d4518a 100644 --- a/lld/ELF/BPSectionOrderer.cpp +++ b/lld/ELF/BPSectionOrderer.cpp @@ -81,7 +81,8 @@ DenseMap elf::runBalancedPartitioning( if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec || !orderer.secToSym.try_emplace(sec, d).second) return; - rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))] + rootSymbolToSectionIdxs[CachedHashStringRef( + lld::utils::getRootSymbol(sym.getName()))] .insert(sections.size()); sections.emplace_back(sec); }; diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp index 6c13a5557e10..1f07ea9caf5d 100644 --- a/lld/MachO/BPSectionOrderer.cpp +++ b/lld/MachO/BPSectionOrderer.cpp @@ -124,7 +124,7 @@ DenseMap lld::macho::runBalancedPartitioning( size_t idx = sections.size(); sections.emplace_back(isec); for (auto *sym : BPOrdererMachO::getSymbols(*isec)) { - auto rootName = getRootSymbol(sym->getName()); + auto rootName = lld::utils::getRootSymbol(sym->getName()); rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx); if (auto linkageName = BPOrdererMachO::getResolvedLinkageName(rootName)) diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index 7a4a5d8465f6..5faedd9b790a 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -21,6 +21,7 @@ #include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Utils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/Support/Path.h" @@ -250,7 +251,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { if (sym->isAbsolute()) return std::nullopt; - auto it = priorities.find(sym->getName()); + auto it = priorities.find(utils::getRootSymbol(sym->getName())); if (it == priorities.end()) return std::nullopt; const SymbolPriorityEntry &entry = it->second; @@ -330,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { break; } } - symbol = line.trim(); + symbol = utils::getRootSymbol(line.trim()); if (!symbol.empty()) { SymbolPriorityEntry &entry = priorities[symbol]; diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc index 51dfb6471644..bb2e55af1eb3 100644 --- a/lld/include/lld/Common/BPSectionOrdererBase.inc +++ b/lld/include/lld/Common/BPSectionOrdererBase.inc @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Utils.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" @@ -147,19 +148,6 @@ static SmallVector> getUnsForCompression( return sectionUns; } -/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where -/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is -/// the global merge functions suffix -/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol -/// name before this suffix so these symbols can be matched with profiles which -/// may have different suffixes. -inline StringRef getRootSymbol(StringRef name) { - name.consume_back(".Tgm"); - auto [P0, S0] = name.rsplit(".llvm."); - auto [P1, S1] = P0.rsplit(".__uniq."); - return P1; -} - template auto BPOrderer::computeOrder( StringRef profilePath, bool forFunctionCompression, bool forDataCompression, @@ -197,7 +185,7 @@ auto BPOrderer::computeOrder( for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { auto [_, parsedFuncName] = getParsedIRPGOName( reader->getSymtab().getFuncOrVarName(trace[timestamp])); - parsedFuncName = getRootSymbol(parsedFuncName); + parsedFuncName = lld::utils::getRootSymbol(parsedFuncName); auto sectionIdxsIt = rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName)); @@ -375,7 +363,7 @@ auto BPOrderer::computeOrder( // 4? uint64_t lastPage = endAddress / pageSize; StringRef rootSymbol = D::getSymName(*sym); - rootSymbol = getRootSymbol(rootSymbol); + rootSymbol = lld::utils::getRootSymbol(rootSymbol); symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol)) symbolToPageNumbers.try_emplace(resolvedLinkageName.value(), @@ -393,7 +381,7 @@ auto BPOrderer::computeOrder( auto traceId = trace.FunctionNameRefs[step]; auto [Filename, ParsedFuncName] = getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); - ParsedFuncName = getRootSymbol(ParsedFuncName); + ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName); auto it = symbolToPageNumbers.find(ParsedFuncName); if (it != symbolToPageNumbers.end()) { auto &[firstPage, lastPage] = it->getValue(); diff --git a/lld/include/lld/Common/Utils.h b/lld/include/lld/Common/Utils.h new file mode 100644 index 000000000000..86bcfa13d00f --- /dev/null +++ b/lld/include/lld/Common/Utils.h @@ -0,0 +1,30 @@ +//===- Utils.h ------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +// +// The file declares utils functions that can be shared across archs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_UTILS_H +#define LLD_UTILS_H + +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace utils { + +/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where +/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is +/// the global merge functions suffix +/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol +/// name before this suffix so these symbols can be matched with profiles which +/// may have different suffixes. +llvm::StringRef getRootSymbol(llvm::StringRef Name); +} // namespace utils +} // namespace lld + +#endif diff --git a/lld/test/MachO/order-file-strip-hashes.s b/lld/test/MachO/order-file-strip-hashes.s new file mode 100644 index 000000000000..ee0f876bb67e --- /dev/null +++ b/lld/test/MachO/order-file-strip-hashes.s @@ -0,0 +1,94 @@ +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s + +#--- a.s +.text +.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + +_main: + ret +A: + ret +F: + add w0, w0, #3 + bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret +C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222: + add w0, w0, #2 + bl A + ret +D: + add w0, w0, #2 + bl B + ret +B: + add w0, w0, #1 + bl A + ret +E: + add w0, w0, #2 + bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret + +.section __DATA,__objc_const +# test multiple symbols at the same address, which will be alphabetic sorted based symbol names +_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2: + .quad 789 + +_OBJC_$_CATEGORY_SOME_$_FOLDED: +_OBJC_$_CATEGORY_Foo_$_Cat1: +_ALPHABETIC_SORT_FIRST: + .quad 123 + +_OBJC_$_CATEGORY_Foo_$_Cat2: + .quad 222 + +_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1: + .quad 456 + +.section __DATA,__objc_data +_OBJC_CLASS_$_Foo: + .quad 123 + +_OBJC_CLASS_$_Bar.llvm.1234: + .quad 456 + +_OBJC_CLASS_$_Baz: + .quad 789 + +_OBJC_CLASS_$_Baz2: + .quad 999 + +.section __DATA,__objc_classrefs +.quad _OBJC_CLASS_$_Foo +.quad _OBJC_CLASS_$_Bar.llvm.1234 +.quad _OBJC_CLASS_$_Baz + +.subsections_via_symbols + + +#--- ord-1 +# change order, parital covered +A +B +C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666 +_OBJC_CLASS_$_Baz +_OBJC_CLASS_$_Bar.__uniq.12345 +_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789 +_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1 +_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567 + +# .text +# CHECK: A +# CHECK: B +# CHECK: C +# .section __DATA,__objc_const +# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1 +# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1 +# .section __DATA,__objc_data +# CHECK: _OBJC_CLASS_$_Baz +# CHECK: _OBJC_CLASS_$_Bar +# CHECK: _OBJC_CLASS_$_Foo