Files
clang-p2996/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
Joseph Huber a1da746157 [AMDGPU] Place global constructors in .init_array and .fini_array
For the GPU, we emit external kernels that call the initializers and
constructors, however if we had a persistent kernel like in the `_start`
kernel for the `libc` project, we could initialize the standard way of
calling constructors. This patch adds new global variables containing
pointers to the constructors to be called. If these are placed in the
`.init_array` and `.fini_array` sections, then the backend will handle
them specially. The linker will then provide the `__init_array_` and
`__fini_array_` sections to traverse them. An implementation would look
like this.

```
extern uintptr_t __init_array_start[];
extern uintptr_t __init_array_end[];
extern uintptr_t __fini_array_start[];
extern uintptr_t __fini_array_end[];

using InitCallback = void(int, char **, char **);
using FiniCallback = void(void);

extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
_start(int argc, char **argv, char **envp) {
  uint64_t init_array_size = __init_array_end - __init_array_start;
  for (uint64_t i = 0; i < init_array_size; ++i)
    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
  uint64_t fini_array_size = __fini_array_end - __fini_array_start;
  for (uint64_t i = 0; i < fini_array_size; ++i)
    reinterpret_cast<FiniCallback *>(__fini_array_start[i])();
}
```

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D149340
2023-04-29 08:40:19 -05:00

81 lines
2.8 KiB
LLVM

; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-lower-ctor-dtor %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; Make sure we emit code for constructor entries that aren't direct
; function calls.
; Check a constructor that's an alias, and an integer literal.
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [
{ i32, ptr, ptr } { i32 1, ptr @foo.alias, i8* null },
{ i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), i8* null }
]
; Check a constantexpr addrspacecast
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [
{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), i8* null }
]
@foo.alias = hidden alias void (), ptr @foo
;.
; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
; CHECK: @foo.alias = hidden alias void (), ptr @foo
;.
define void @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: ret void
;
ret void
}
define void @bar() addrspace(1) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: ret void
;
ret void
}
; CHECK: define amdgpu_kernel void @amdgcn.device.init() #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void @foo.alias()
; CHECK-NEXT: call void inttoptr (i64 4096 to ptr)()
; CHECK-NEXT: ret void
; CHECK-NEXT: }
; CHECK: define amdgpu_kernel void @amdgcn.device.fini() #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: call void addrspacecast (ptr addrspace(1) @bar to ptr)()
; CHECK-NEXT: ret void
; CHECK-NEXT: }
;.
; CHECK: attributes #[[ATTR0]] = { "device-init" }
; CHECK: attributes #[[ATTR1]] = { "device-fini" }
; GCN-LABEL: foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: bar:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: amdgcn.device.init:
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], foo.alias@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], foo.alias@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN: s_mov_b64 [[LIT_ADDR:s\[[0-9]+:[0-9]+\]]], 0x1000
; GCN: s_swappc_b64 s[30:31], [[LIT_ADDR]]
; GCN-NEXT: s_endpgm
;
; GCN-LABEL: amdgcn.device.fini:
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], bar@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], bar@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s{{\[}}[[GOT_LO:[0-9]+]]:[[GOT_HI:[0-9]+]]{{\]}}, s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}, 0x0
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[GOT_LO]]:[[GOT_HI]]{{\]}}
; GCN-NEXT: s_endpgm