Files
clang-p2996/llvm/test/CodeGen/AMDGPU/ptrmask.ll
Nikita Popov eb86de63d9 [IR] Require that ptrmask mask matches pointer index size (#69343)
Currently, we specify that the ptrmask intrinsic allows the mask to have
any size, which will be zero-extended or truncated to the pointer size.

However, what semantics of the specified GEP expansion actually imply is
that the mask is only meaningful up to the pointer type *index* size --
any higher bits of the pointer will always be preserved. In other words,
the mask gets 1-extended from the index size to the pointer size. This
is also the behavior we want for CHERI architectures.

This PR makes two changes:
* It spells out the interaction with the pointer type index size more
explicitly.
* It requires that the mask matches the pointer type index size. The
intention here is to make handling of this intrinsic more robust, to
avoid accidental mix-ups of pointer size and index size in code
generating this intrinsic. If a zero-extend or truncate of the mask is
desired, it should just be done explicitly in IR. This also cuts down on
the amount of testing we have to do, and things transforms needs to
check for.

As far as I can tell, we don't actually support pointers with different
index type size at the SDAG level, so I'm just asserting the sizes match
there for now. Out-of-tree targets using different index sizes may need
to adjust that code.
2023-10-24 09:54:29 +02:00

75 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define ptr addrspace(1) @v_ptrmask_global_variable_i64(ptr addrspace(1) %ptr, i64 %mask) {
; GCN-LABEL: v_ptrmask_global_variable_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v1, v1, v3
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask)
ret ptr addrspace(1) %masked
}
define ptr addrspace(3) @v_ptrmask_local_variable_i32(ptr addrspace(3) %ptr, i32 %mask) {
; GCN-LABEL: v_ptrmask_local_variable_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
ret ptr addrspace(3) %masked
}
define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i64(ptr addrspace(1) inreg %ptr, i64 inreg %mask) {
; GCN-LABEL: s_ptrmask_global_variable_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ptrmask_global_variable_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
; GFX10PLUS-NEXT: ; return to shader part epilog
%masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask)
ret ptr addrspace(1) %masked
}
define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3) inreg %ptr, i32 inreg %mask) {
; GCN-LABEL: s_ptrmask_local_variable_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_and_b32 s0, s2, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ptrmask_local_variable_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
ret ptr addrspace(3) %masked
}
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
attributes #0 = { nounwind readnone speculatable willreturn }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}