Files
clang-p2996/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
Jon Chesterfield e75ce77cd7 [amdgpu][lds] Fix missing markUsedByKernel calls and undef lookup table elements
More robust association between the kernels and lds struct.

Use poison instead of value() for lookup table elements introduced by dynamic lds lowering.

Extracted from D154946, new test from there verbatim. Segv fixed.

Fixes issues/63338

Fixes SWDEV-404491

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D154972
2023-07-12 00:37:21 +01:00

147 lines
7.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefixes=CHECK,MODULE %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefixes=CHECK,TABLE %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=kernel | FileCheck -check-prefixes=CHECK,K_OR_HY %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=hybrid | FileCheck -check-prefixes=CHECK,K_OR_HY %s
;; Same checks for kernel and for hybrid as an unambiguous reference to a variable - one where exactly one kernel
;; can reach it - is the case where hybrid lowering can always prefer the direct access.
;; Single kernel is sole user of single variable, all options codegen as direct access to kernel struct
@k0.lds = addrspace(3) global i8 undef
define amdgpu_kernel void @k0() {
; CHECK-LABEL: @k0(
; CHECK-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 2
; CHECK-NEXT: store i8 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
; CHECK-NEXT: ret void
;
%ld = load i8, ptr addrspace(3) @k0.lds
%mul = mul i8 %ld, 2
store i8 %mul, ptr addrspace(3) @k0.lds
ret void
}
;; Function is reachable from one kernel. Variable goes in module lds or the kernel struct, but never both.
@f0.lds = addrspace(3) global i16 undef
define void @f0() {
; MODULE-LABEL: @f0(
; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1:![0-9]+]], !noalias [[META4:![0-9]+]]
; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f0(
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[F0_LDS2]], align 4
; TABLE-NEXT: [[F0_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[F0_LDS3]], align 2
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[F0_LDS]], align 4
; TABLE-NEXT: [[F0_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[F0_LDS1]], align 2
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @f0(
; K_OR_HY-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds, align 2
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; K_OR_HY-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds, align 2
; K_OR_HY-NEXT: ret void
;
%ld = load i16, ptr addrspace(3) @f0.lds
%mul = mul i16 %ld, 3
store i16 %mul, ptr addrspace(3) @f0.lds
ret void
}
define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @k_f0(
; TABLE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
; TABLE-NEXT: call void @f0()
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k_f0(
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
; K_OR_HY-NEXT: call void @f0()
; K_OR_HY-NEXT: ret void
;
call void @f0()
ret void
}
;; As above, but with the kernel also uing the variable.
@both.lds = addrspace(3) global i32 undef
define void @f_both() {
; MODULE-LABEL: @f_both(
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS2]], align 4
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) [[BOTH_LDS3]], align 4
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS]], align 4
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) [[BOTH_LDS1]], align 4
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @f_both(
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: ret void
;
%ld = load i32, ptr addrspace(3) @both.lds
%mul = mul i32 %ld, 4
store i32 %mul, ptr addrspace(3) @both.lds
ret void
}
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @k0_both(
; TABLE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; TABLE-NEXT: call void @f_both()
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k0_both(
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: call void @f_both()
; K_OR_HY-NEXT: ret void
;
%ld = load i32, ptr addrspace(3) @both.lds
%mul = mul i32 %ld, 5
store i32 %mul, ptr addrspace(3) @both.lds
call void @f_both()
ret void
}