Files
clang-p2996/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
Jon Chesterfield d77ae7f251 [amdgpu] Reimplement LDS lowering
Renames the current lowering scheme to "module" and introduces two new
ones, "kernel" and "table", plus a "hybrid" that chooses between those three
on a per-variable basis.

Unit tests are set up to pass with the default lowering of "module" or "hybrid"
with this patch defaulting to "module", which will be a less dramatic codegen
change relative to the current. This reflects the sparsity of test coverage for
the table lowering method. Hybrid is better than module in every respect and
will be default in a subsequent patch.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D139433
2022-12-07 22:02:54 +00:00

53 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefix=GCN %s
; Check that module LDS is allocated at address 0 and kernel starts its
; allocation past module LDS when a call is present.
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16
; GCN-LABEL: {{^}}k0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: ds_write_b8 [[NULL]], [[ONE]]
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; GCN: ds_write_b8 [[NULL]], [[TWO]] offset:16
define amdgpu_kernel void @k0() {
; OPT-LABEL: @k0(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"([[LLVM_AMDGCN_MODULE_LDS_T:%.*]] addrspace(3)* @llvm.amdgcn.module.lds) ]
; OPT-NEXT: [[LDS_SIZE_1_ALIGN_1_BC:%.*]] = bitcast [1 x i8] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], [[LLVM_AMDGCN_MODULE_LDS_T]] addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to i8 addrspace(3)*
; OPT-NEXT: store i8 1, i8 addrspace(3)* [[LDS_SIZE_1_ALIGN_1_BC]], align 1
; OPT-NEXT: [[LDS_SIZE_16_ALIGN_16_BC:%.*]] = bitcast [16 x i8] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K0_LDS_T:%.*]], [[LLVM_AMDGCN_KERNEL_K0_LDS_T]] addrspace(3)* @llvm.amdgcn.kernel.k0.lds, i32 0, i32 0) to i8 addrspace(3)*
; OPT-NEXT: store i8 2, i8 addrspace(3)* [[LDS_SIZE_16_ALIGN_16_BC]], align 16
; OPT-NEXT: call void @f0()
; OPT-NEXT: ret void
;
%lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @lds.size.1.align.1 to i8 addrspace(3)*
store i8 1, i8 addrspace(3)* %lds.size.1.align.1.bc, align 1
%lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @lds.size.16.align.16 to i8 addrspace(3)*
store i8 2, i8 addrspace(3)* %lds.size.16.align.16.bc, align 16
call void @f0()
ret void
}
; GCN-LABEL: {{^}}f0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
; GCN: ds_write_b8 [[NULL]], [[TREE]]
define void @f0() {
; OPT-LABEL: @f0() {
; OPT-NEXT: [[LDS_SIZE_1_ALIGN_1_BC:%.*]] = bitcast [1 x i8] addrspace(3)* getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], [[LLVM_AMDGCN_MODULE_LDS_T]] addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0) to i8 addrspace(3)*
; OPT-NEXT: store i8 3, i8 addrspace(3)* [[LDS_SIZE_1_ALIGN_1_BC]], align 1
; OPT-NEXT: ret void
;
%lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @lds.size.1.align.1 to i8 addrspace(3)*
store i8 3, i8 addrspace(3)* %lds.size.1.align.1.bc, align 1
ret void
}
attributes #0 = { "amdgpu-elide-module-lds" }
; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }