Files
clang-p2996/llvm/test/CodeGen/AMDGPU/internalize.ll
Stanislav Mekhanoshin a3b72798af [AMDGPU] Internalize non-kernel symbols
Since we have no call support and late linking we can produce code
only for used symbols. This saves compilation time, size of the final
executable, and size of any intermediate dumps.

Run Internalize pass early in the opt pipeline followed by global
DCE pass. To enable it RT can pass -amdgpu-internalize-symbols option.

Differential Revision: https://reviews.llvm.org/D29214

llvm-svn: 293549
2017-01-30 21:05:18 +00:00

36 lines
953 B
LLVM

; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
; CHECK-NOT: unused
; CHECK-NOT: foo_used
; CHECK: gvar_used
; CHECK: main_kernel
@gvar_unused = addrspace(1) global i32 undef, align 4
@gvar_used = addrspace(1) global i32 undef, align 4
; Function Attrs: alwaysinline nounwind
define void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
entry:
store i32 1, i32 addrspace(1)* %out
ret void
}
; Function Attrs: alwaysinline nounwind
define void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
entry:
store i32 %tid, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @main_kernel() {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
attributes #1 = { alwaysinline nounwind }