Files
clang-p2996/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
Jon Chesterfield 74e928a081 [amdgpu][lds] Remove recalculation of LDS frame from backend
Do the LDS frame calculation once, in the IR pass, instead of repeating the work in the backend.

Prior to this patch:
The IR lowering pass sets up a per-kernel LDS frame and annotates the variables with absolute_symbol
metadata so that the assembler can build lookup tables out of it. There is a fragile association between
kernel functions and named structs which is used to recompute the frame layout in the backend, with
fatal_errors catching inconsistencies in the second calculation.

After this patch:
The IR lowering pass additionally sets a frame size attribute on kernels. The backend uses the same
absolute_symbol metadata that the assembler uses to place objects within that frame size.

Deleted the now dead allocation code from the backend. Left for a later cleanup:
- enabling lowering for anonymous functions
- removing the elide-module-lds attribute (test churn, it's not used by llc any more)
- adjusting the dynamic alignment check to not use symbol names

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D155190
2023-07-13 23:54:38 +01:00

87 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=hybrid < %s | FileCheck %s
;; Reduced from a larger test case. Checks that functions and kernels that use only dynamic lds
;; are lowered successfully. Previously they only worked if the kernel happened to also use static lds
;; variables. Artefact of implementing dynamic variables by adapting existing code for static.
@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]
define amdgpu_kernel void @kernel_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0() #0 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ]
; CHECK-NEXT: call void @call_store_A()
; CHECK-NEXT: ret void
;
call void @call_store_A()
ret void
}
define amdgpu_kernel void @kernel_1() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
ret void
}
define amdgpu_kernel void @kernel_2() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2() #0 !llvm.amdgcn.lds.kernel.id !3 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ]
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: ret void
;
call void @store_A()
ret void
}
define amdgpu_kernel void @kernel_3() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
ret void
}
define private void @call_store_A() {
; CHECK-LABEL: define private void @call_store_A() {
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: ret void
;
call void @store_A()
ret void
}
define private void @store_A() {
; CHECK-LABEL: define private void @store_A() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr
; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8
; CHECK-NEXT: ret void
;
store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null
ret void
}
define private ptr @get_B_ptr() {
; CHECK-LABEL: define private ptr @get_B_ptr() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4
; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr
; CHECK-NEXT: ret ptr [[TMP3]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
}
; CHECK: attributes #0 = { "amdgpu-lds-size"="64" }