From dadea967915db7ac33963c3874097e4b1a961ca8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 25 Apr 2025 10:19:47 +0200 Subject: [PATCH] AMDGPU: Add range to wavefrontsize intrinsic declaration (#136303) --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 +++++--- llvm/test/Assembler/amdgcn-intrinsic-attributes.ll | 8 ++++++++ .../InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 75068717d9a5..a57eb4a6dba4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -234,9 +234,11 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[], def int_amdgcn_init_whole_wave : Intrinsic<[llvm_i1_ty], [], [ IntrHasSideEffects, IntrNoMem, IntrConvergent]>; -def int_amdgcn_wavefrontsize : - ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef, IntrNoMem, IntrSpeculatable]>; +def int_amdgcn_wavefrontsize + : ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [], + [NoUndef, Range, + IntrNoMem, IntrSpeculatable]>; // Represent a relocation constant. def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic< diff --git a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll index bd5ce2ddda3e..744c94ac8541 100644 --- a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll +++ b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll @@ -18,4 +18,12 @@ define i32 @ds_consume(ptr addrspace(3) %ptr) { ret i32 %ret } +; Test assumed range +; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #1 +define i32 @wavefrontsize() { + %ret = call i32 @llvm.amdgcn.wavefrontsize() + ret i32 %ret +} + ; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 92f0af30b9e4..e065d96ad0ba 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) noca ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) { ; OPT-NEXT: [[BB:.*:]] ; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] -; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32 ; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1 ; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4 ; OPT-NEXT: ret void @@ -69,7 +69,7 @@ define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) n ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) { ; OPT-NEXT: [[BB:.*:]] ; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] -; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32 ; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] ; OPT: [[BB2]]: ; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4