Files
clang-p2996/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
Matt Arsenault e630d9b299 AMDGPU/clang: Remove target features from address space test builtins
It turns out we can codegen these on targets without flat addressing,
although the runtime probably didn't put anything useful there. The
proper diagnostic would be to disallow flat pointer uses or languages
with them, not this one edge case. Allows removing one of the special
cases requiring subtarget support in the device libraries.
2022-12-29 18:46:41 -05:00

54 lines
2.0 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}is_local_vgpr:
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
%id = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
%ptr = load volatile ptr, ptr addrspace(1) %gep
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
%ext = zext i1 %val to i32
store i32 %ext, ptr addrspace(1) undef
ret void
}
; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
; select and vcc branch.
; GCN-LABEL: {{^}}is_local_sgpr:
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
; GCN: s_cbranch_vccnz
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
br i1 %val, label %bb0, label %bb1
bb0:
store volatile i32 0, ptr addrspace(1) undef
br label %bb1
bb1:
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
attributes #0 = { nounwind readnone speculatable }