Currently, we specify that the ptrmask intrinsic allows the mask to have any size, which will be zero-extended or truncated to the pointer size. However, what semantics of the specified GEP expansion actually imply is that the mask is only meaningful up to the pointer type *index* size -- any higher bits of the pointer will always be preserved. In other words, the mask gets 1-extended from the index size to the pointer size. This is also the behavior we want for CHERI architectures. This PR makes two changes: * It spells out the interaction with the pointer type index size more explicitly. * It requires that the mask matches the pointer type index size. The intention here is to make handling of this intrinsic more robust, to avoid accidental mix-ups of pointer size and index size in code generating this intrinsic. If a zero-extend or truncate of the mask is desired, it should just be done explicitly in IR. This also cuts down on the amount of testing we have to do, and things transforms needs to check for. As far as I can tell, we don't actually support pointers with different index type size at the SDAG level, so I'm just asserting the sizes match there for now. Out-of-tree targets using different index sizes may need to adjust that code.
75 lines
3.2 KiB
LLVM
75 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
|
|
|
|
define ptr addrspace(1) @v_ptrmask_global_variable_i64(ptr addrspace(1) %ptr, i64 %mask) {
|
|
; GCN-LABEL: v_ptrmask_global_variable_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v1, v1, v3
|
|
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64:
|
|
; GFX10PLUS: ; %bb.0:
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
|
|
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
|
|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
|
|
%masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask)
|
|
ret ptr addrspace(1) %masked
|
|
}
|
|
|
|
define ptr addrspace(3) @v_ptrmask_local_variable_i32(ptr addrspace(3) %ptr, i32 %mask) {
|
|
; GCN-LABEL: v_ptrmask_local_variable_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32:
|
|
; GFX10PLUS: ; %bb.0:
|
|
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
|
|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
|
|
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
|
|
ret ptr addrspace(3) %masked
|
|
}
|
|
|
|
define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i64(ptr addrspace(1) inreg %ptr, i64 inreg %mask) {
|
|
; GCN-LABEL: s_ptrmask_global_variable_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: s_ptrmask_global_variable_i64:
|
|
; GFX10PLUS: ; %bb.0:
|
|
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
%masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask)
|
|
ret ptr addrspace(1) %masked
|
|
}
|
|
|
|
define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3) inreg %ptr, i32 inreg %mask) {
|
|
; GCN-LABEL: s_ptrmask_local_variable_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_and_b32 s0, s2, s3
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10PLUS-LABEL: s_ptrmask_local_variable_i32:
|
|
; GFX10PLUS: ; %bb.0:
|
|
; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3
|
|
; GFX10PLUS-NEXT: ; return to shader part epilog
|
|
%masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
|
|
ret ptr addrspace(3) %masked
|
|
}
|
|
|
|
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
|
|
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
|
|
|
|
attributes #0 = { nounwind readnone speculatable willreturn }
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX10: {{.*}}
|
|
; GFX11: {{.*}}
|