Change `CountersPtr` in `__profd_` to a label difference, which is a link-time
constant. On ELF, when linking a shared object, this requires that `__profc_` is
either private or linkonce/linkonce_odr hidden. On COFF, we need D104564 so that
`.quad a-b` (64-bit label difference) can lower to a 32-bit PC-relative relocation.
```
# ELF: R_X86_64_PC64 (PC-relative)
.quad .L__profc_foo-.L__profd_foo
# Mach-O: a pair of 8-byte X86_64_RELOC_UNSIGNED and X86_64_RELOC_SUBTRACTOR
.quad l___profc_foo-l___profd_foo
# COFF: we actually use IMAGE_REL_AMD64_REL32/IMAGE_REL_ARM64_REL32 so
# the high 32-bit value is zero even if .L__profc_foo < .L__profd_foo
# As compensation, we truncate CountersDelta in the header so that
# __llvm_profile_merge_from_buffer and llvm-profdata reader keep working.
.quad .L__profc_foo-.L__profd_foo
```
(Note: link.exe sorts `.lprfc` before `.lprfd` even if the object writer
has `.lprfd` before `.lprfc`, so we cannot work around by reordering
`.lprfc` and `.lprfd`.)
With this change, a stage 2 (`-DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_BUILD_INSTRUMENTED=IR`)
`ld -pie` linked clang is 1.74% smaller due to fewer R_X86_64_RELATIVE relocations.
```
% readelf -r pie | awk '$3~/R.*/{s[$3]++} END {for (k in s) print k, s[k]}'
R_X86_64_JUMP_SLO 331
R_X86_64_TPOFF64 2
R_X86_64_RELATIVE 476059 # was: 607712
R_X86_64_64 2616
R_X86_64_GLOB_DAT 31
```
The absolute function address (used by llvm-profdata to collect indirect call
targets) can be converted to relative as well, but is not done in this patch.
Differential Revision: https://reviews.llvm.org/D104556
27 lines
1.3 KiB
LLVM
27 lines
1.3 KiB
LLVM
; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s
|
|
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
$foo = comdat any
|
|
; CHECK: $foo = comdat any
|
|
|
|
; CHECK: $__llvm_profile_raw_version = comdat any
|
|
; CHECK: $__profc__stdin__foo.[[#FOO_HASH:]] = comdat any
|
|
|
|
@bar = global i32 ()* @foo, align 8
|
|
|
|
; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
|
|
; CHECK-NOT: __profn__stdin__foo
|
|
; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
|
|
; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint ([1 x i64]* @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint ({ i64, i64, i64, i8*, i8*, i32, [2 x i16] }* @__profd__stdin__foo.742261418966908927 to i64)), i8* null
|
|
; CHECK-NOT: bitcast (i32 ()* @foo to i8*)
|
|
; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
|
|
; CHECK: @__llvm_prf_nm
|
|
; CHECK: @llvm.compiler.used
|
|
|
|
define internal i32 @foo() comdat {
|
|
entry:
|
|
ret i32 1
|
|
}
|