Files
clang-p2996/llvm/test/CodeGen/NVPTX/rsqrt.ll
Alex MacLean df60805123 [NVPTX] Improve support for rsqrt.approx (#89417)
Complete support for rsqrt.approx with rsqrt.approx.f64 ([PTX ISA
9.7.3.17. Floating Point Instructions:
rsqrt.approx.ftz.f64](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64)).
Additionally, add support for folding `sqrt` into `rsqrt`, with an
optional flag to disable.
2024-04-23 08:56:39 -07:00

36 lines
1.1 KiB
LLVM

; RUN: llc < %s -march=nvptx64 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
; CHECK-LABEL: .func{{.*}}test1
define float @test1(float %in) local_unnamed_addr {
; CHECK: rsqrt.approx.f32
%call = call float @llvm.nvvm.rsqrt.approx.f(float %in)
ret float %call
}
; CHECK-LABEL: .func{{.*}}test2
define double @test2(double %in) local_unnamed_addr {
; CHECK: rsqrt.approx.f64
%call = call double @llvm.nvvm.rsqrt.approx.d(double %in)
ret double %call
}
; CHECK-LABEL: .func{{.*}}test3
define float @test3(float %in) local_unnamed_addr {
; CHECK: rsqrt.approx.ftz.f32
%call = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %in)
ret float %call
}
; CHECK-LABEL: .func{{.*}}test4
define double @test4(double %in) local_unnamed_addr {
; CHECK: rsqrt.approx.ftz.f64
%call = tail call double @llvm.nvvm.rsqrt.approx.ftz.d(double %in)
ret double %call
}
declare float @llvm.nvvm.rsqrt.approx.ftz.f(float)
declare double @llvm.nvvm.rsqrt.approx.ftz.d(double)
declare float @llvm.nvvm.rsqrt.approx.f(float)
declare double @llvm.nvvm.rsqrt.approx.d(double)