Summary:
The AMDGPU backend uses the linker-provided INIT_ARRAY and FINI_ARRAY
sections to call all the global constructors in a single kernel.
Previously this mistakenly used the same iteration logic for both
arrays. The destructors stored in FINI_ARRAY are stored in the same
order as
the ones in the INIT_ARRAY section so we need to traverse it in reverse
order.
Relanding after the revert in fe7b5e2cfc
using the IR builder interface instead of ConstantExpr.
86 lines
4.5 KiB
LLVM
86 lines
4.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --version 3
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-lower-ctor-dtor %s | FileCheck %s
|
|
|
|
; Make sure we emit code for constructor entries that aren't direct
|
|
; function calls.
|
|
|
|
; Check a constructor that's an alias, and an integer literal.
|
|
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [
|
|
{ i32, ptr, ptr } { i32 1, ptr @foo.alias, i8* null },
|
|
{ i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), i8* null }
|
|
]
|
|
|
|
; Check a constantexpr addrspacecast
|
|
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [
|
|
{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), i8* null }
|
|
]
|
|
|
|
@foo.alias = hidden alias void (), ptr @foo
|
|
|
|
define void @foo() {
|
|
ret void
|
|
}
|
|
|
|
define void @bar() addrspace(1) {
|
|
ret void
|
|
}
|
|
|
|
;.
|
|
; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }]
|
|
; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }]
|
|
; CHECK: @[[__INIT_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
|
|
; CHECK: @[[__INIT_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
|
|
; CHECK: @[[__FINI_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
|
|
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
|
|
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
|
|
; CHECK: @[[FOO_ALIAS:[a-zA-Z0-9_$"\\.-]+]] = hidden alias void (), ptr @foo
|
|
;.
|
|
; CHECK-LABEL: define void @foo(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-LABEL: define void @bar(
|
|
; CHECK-SAME: ) addrspace(1) #[[ATTR0]] {
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
|
|
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
|
; CHECK: while.entry:
|
|
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
|
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
|
|
; CHECK-NEXT: call void [[CALLBACK]]()
|
|
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
|
|
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
|
|
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;
|
|
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
|
|
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start
|
|
; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
|
; CHECK: while.entry:
|
|
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[TMP2]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
|
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
|
|
; CHECK-NEXT: call void [[CALLBACK]]()
|
|
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
|
|
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
|
|
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
|
|
; CHECK: while.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx900" }
|
|
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
|
|
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
|
|
;.
|