Files
clang-p2996/llvm/test/CodeGen/NVPTX/activemask.ll
Joseph Huber d492faa7aa [NVPTX] Add 'activemask' builtin and intrinsic support (#79768)
Summary:
This patch adds support for getting the 'activemask' instruction's value
without needing to use inline assembly. See the relevant PTX reference
for details.


https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask
2024-01-29 14:07:30 -06:00

39 lines
961 B
LLVM

; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_52 -mattr=+ptx62 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx62 | %ptxas-verify %}
declare i32 @llvm.nvvm.activemask()
; CHECK-LABEL: activemask(
;
; CHECK: activemask.b32 %[[REG:.+]];
; CHECK-NEXT: st.param.b32 [func_retval0+0], %[[REG]];
; CHECK-NEXT: ret;
define dso_local i32 @activemask() {
entry:
%mask = call i32 @llvm.nvvm.activemask()
ret i32 %mask
}
; CHECK-LABEL: convergent(
;
; CHECK: activemask.b32 %[[REG:.+]];
; CHECK: activemask.b32 %[[REG]];
; CHECK: .param.b32 [func_retval0+0], %[[REG]];
; CHECK-NEXT: ret;
define dso_local i32 @convergent(i1 %cond) {
entry:
br i1 %cond, label %if.else, label %if.then
if.then:
%0 = call i32 @llvm.nvvm.activemask()
br label %if.end
if.else:
%1 = call i32 @llvm.nvvm.activemask()
br label %if.end
if.end:
%mask = phi i32 [ %0, %if.then ], [ %1, %if.else ]
ret i32 %mask
}