Add support for '`llvm.nvvm.flo.[su].*`' intrinsics which correspond to a PTX `bfind` instruction. See [PTX ISA 9.7.1.16. Integer Arithmetic Instructions: bfind] (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfind) The '`llvm.nvvm.flo.u`' family of intrinsics identifies the bit position of the leading one, returning either it's offset from the most or least significant bit. The '`llvm.nvvm.flo.s`' family of intrinsics identifies the bit position of the leading non-sign bit, returning either it's offset from the most or least significant bit.
133 lines
3.5 KiB
LLVM
133 lines
3.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s | FileCheck %s
|
|
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
|
|
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
define i32 @flo_1(i32 %a) {
|
|
; CHECK-LABEL: flo_1(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u32 %r1, [flo_1_param_0];
|
|
; CHECK-NEXT: bfind.s32 %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.s.i32(i32 %a, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
|
|
define i32 @flo_2(i32 %a) {
|
|
; CHECK-LABEL: flo_2(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u32 %r1, [flo_2_param_0];
|
|
; CHECK-NEXT: bfind.shiftamt.s32 %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.s.i32(i32 %a, i1 true)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @flo_3(i32 %a) {
|
|
; CHECK-LABEL: flo_3(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u32 %r1, [flo_3_param_0];
|
|
; CHECK-NEXT: bfind.u32 %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.u.i32(i32 %a, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
|
|
define i32 @flo_4(i32 %a) {
|
|
; CHECK-LABEL: flo_4(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u32 %r1, [flo_4_param_0];
|
|
; CHECK-NEXT: bfind.shiftamt.u32 %r2, %r1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.u.i32(i32 %a, i1 true)
|
|
ret i32 %r
|
|
}
|
|
|
|
|
|
|
|
define i32 @flo_5(i64 %a) {
|
|
; CHECK-LABEL: flo_5(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [flo_5_param_0];
|
|
; CHECK-NEXT: bfind.s64 %r1, %rd1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.s.i64(i64 %a, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
|
|
define i32 @flo_6(i64 %a) {
|
|
; CHECK-LABEL: flo_6(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [flo_6_param_0];
|
|
; CHECK-NEXT: bfind.shiftamt.s64 %r1, %rd1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.s.i64(i64 %a, i1 true)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @flo_7(i64 %a) {
|
|
; CHECK-LABEL: flo_7(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [flo_7_param_0];
|
|
; CHECK-NEXT: bfind.u64 %r1, %rd1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.u.i64(i64 %a, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
|
|
define i32 @flo_8(i64 %a) {
|
|
; CHECK-LABEL: flo_8(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [flo_8_param_0];
|
|
; CHECK-NEXT: bfind.shiftamt.u64 %r1, %rd1;
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%r = call i32 @llvm.nvvm.flo.u.i64(i64 %a, i1 true)
|
|
ret i32 %r
|
|
}
|
|
|
|
declare i32 @llvm.nvvm.flo.s.i32(i32, i1)
|
|
declare i32 @llvm.nvvm.flo.u.i32(i32, i1)
|
|
declare i32 @llvm.nvvm.flo.s.i64(i64, i1)
|
|
declare i32 @llvm.nvvm.flo.u.i64(i64, i1)
|