AMDGPU: Add range to wavefrontsize intrinsic declaration (#136303)

This commit is contained in:
Matt Arsenault
2025-04-25 10:19:47 +02:00
committed by GitHub
parent ada4ad9d1f
commit dadea96791
3 changed files with 15 additions and 5 deletions

View File

@@ -234,9 +234,11 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
def int_amdgcn_init_whole_wave : Intrinsic<[llvm_i1_ty], [], [
IntrHasSideEffects, IntrNoMem, IntrConvergent]>;
def int_amdgcn_wavefrontsize :
ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_wavefrontsize
: ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [],
[NoUndef<RetIndex>, Range<RetIndex, 32, 65>,
IntrNoMem, IntrSpeculatable]>;
// Represent a relocation constant.
def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic<

View File

@@ -18,4 +18,12 @@ define i32 @ds_consume(ptr addrspace(3) %ptr) {
ret i32 %ret
}
; Test assumed range
; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #1
define i32 @wavefrontsize() {
%ret = call i32 @llvm.amdgcn.wavefrontsize()
ret i32 %ret
}
; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View File

@@ -39,7 +39,7 @@ define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) noca
; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) {
; OPT-NEXT: [[BB:.*:]]
; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32
; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1
; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4
; OPT-NEXT: ret void
@@ -69,7 +69,7 @@ define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) n
; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) {
; OPT-NEXT: [[BB:.*:]]
; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32
; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
; OPT: [[BB2]]:
; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4