Summary: This patch adds support for getting the 'activemask' instruction's value without needing to use inline assembly. See the relevant PTX reference for details. https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask
39 lines
961 B
LLVM
39 lines
961 B
LLVM
; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_52 -mattr=+ptx62 | FileCheck %s
|
|
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx62 | %ptxas-verify %}
|
|
|
|
declare i32 @llvm.nvvm.activemask()
|
|
|
|
; CHECK-LABEL: activemask(
|
|
;
|
|
; CHECK: activemask.b32 %[[REG:.+]];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0+0], %[[REG]];
|
|
; CHECK-NEXT: ret;
|
|
define dso_local i32 @activemask() {
|
|
entry:
|
|
%mask = call i32 @llvm.nvvm.activemask()
|
|
ret i32 %mask
|
|
}
|
|
|
|
; CHECK-LABEL: convergent(
|
|
;
|
|
; CHECK: activemask.b32 %[[REG:.+]];
|
|
; CHECK: activemask.b32 %[[REG]];
|
|
; CHECK: .param.b32 [func_retval0+0], %[[REG]];
|
|
; CHECK-NEXT: ret;
|
|
define dso_local i32 @convergent(i1 %cond) {
|
|
entry:
|
|
br i1 %cond, label %if.else, label %if.then
|
|
|
|
if.then:
|
|
%0 = call i32 @llvm.nvvm.activemask()
|
|
br label %if.end
|
|
|
|
if.else:
|
|
%1 = call i32 @llvm.nvvm.activemask()
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%mask = phi i32 [ %0, %if.then ], [ %1, %if.else ]
|
|
ret i32 %mask
|
|
}
|