Use IR analysis to infer when an addrspacecast operand is nonnull, then lower it to an intrinsic that the DAG can use to skip the null check. I did this using an intrinsic as it's non-intrusive. An alternative would have been to allow something like `!nonnull` on `addrspacecast` then lower that to a custom opcode (or add an operand to the addrspacecast MIR/DAG opcodes), but it's a lot of boilerplate for just one target's use case IMO. I'm hoping that when we switch to GISel that we can move all this logic to the MIR level without losing info, but currently the DAG doesn't see enough so we need to act in CGP. Fixes: SWDEV-316445
70 lines
2.7 KiB
LLVM
70 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
|
|
|
|
define void @local_to_flat(ptr addrspace(3) %ptr) {
|
|
; ASM-LABEL: local_to_flat:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
|
|
; ASM-NEXT: v_mov_b32_e32 v1, s5
|
|
; ASM-NEXT: v_mov_b32_e32 v2, 7
|
|
; ASM-NEXT: flat_store_dword v[0:1], v2
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
|
|
store volatile i32 7, ptr %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @private_to_flat(ptr addrspace(5) %ptr) {
|
|
; ASM-LABEL: private_to_flat:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
|
|
; ASM-NEXT: v_mov_b32_e32 v1, s5
|
|
; ASM-NEXT: v_mov_b32_e32 v2, 7
|
|
; ASM-NEXT: flat_store_dword v[0:1], v2
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) %ptr)
|
|
store volatile i32 7, ptr %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @flat_to_local(ptr %ptr) {
|
|
; ASM-LABEL: flat_to_local:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: v_mov_b32_e32 v1, 7
|
|
; ASM-NEXT: ds_write_b32 v0, v1
|
|
; ASM-NEXT: s_waitcnt lgkmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr %ptr)
|
|
store volatile i32 7, ptr addrspace(3) %1, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @flat_to_private(ptr %ptr) {
|
|
; ASM-LABEL: flat_to_private:
|
|
; ASM: ; %bb.0:
|
|
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; ASM-NEXT: v_mov_b32_e32 v1, 7
|
|
; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
|
|
; ASM-NEXT: s_waitcnt vmcnt(0)
|
|
; ASM-NEXT: s_setpc_b64 s[30:31]
|
|
%1 = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr %ptr)
|
|
store volatile i32 7, ptr addrspace(5) %1, align 4
|
|
ret void
|
|
}
|
|
|
|
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3))
|
|
declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5))
|
|
declare ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr)
|
|
declare ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr)
|
|
|
|
declare <4 x ptr> @llvm.amdgcn.addrspacecast.nonnull.v4p0.v4p3(<4 x ptr addrspace(3)>)
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; DAGISEL-ASM: {{.*}}
|
|
; GISEL-ASM: {{.*}}
|