Files
clang-p2996/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
Matt Arsenault e3fd8f83a8 AMDGPU: Correctly expand f64 sqrt intrinsic
rocm-device-libs and llpc were avoiding using f64 sqrt
intrinsics in favor of their own expansions. Port the
expansion into the backend. Both of these users should be
updated to call the intrinsic instead.

The library and llpc expansions are slightly different.
llpc uses an ldexp to do the scale; the library uses a multiply.

Use ldexp to do the scale instead of the multiply.
I believe v_ldexp_f64 and v_mul_f64 are always the same number of
cycles, but it's cheaper to materialize the 32-bit integer constant
than the 64-bit double constant.

The libraries have another fast version of sqrt which will
be handled separately.

I am tempted to do this in an IR expansion instead. In the IR
we could take advantage of computeKnownFPClass to avoid
the 0-or-inf argument check.
2023-07-25 07:54:11 -04:00

1875 lines
86 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL %s
define double @v_sqrt_f64(double %x) {
; SDAG-LABEL: v_sqrt_f64:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_fneg(double %x) {
; SDAG-LABEL: v_sqrt_f64_fneg:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 9
; SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_fneg:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%x.neg = fneg double %x
%result = call double @llvm.sqrt.f64(double %x.neg)
ret double %result
}
define double @v_sqrt_f64_fabs(double %x) {
; SDAG-LABEL: v_sqrt_f64_fabs:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_fabs:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call double @llvm.fabs.f64(double %x)
%result = call double @llvm.sqrt.f64(double %x.fabs)
ret double %result
}
define double @v_sqrt_f64_fneg_fabs(double %x) {
; SDAG-LABEL: v_sqrt_f64_fneg_fabs:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 9
; SDAG-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_fneg_fabs:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -|v[0:1]|, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call double @llvm.fabs.f64(double %x)
%x.fabs.neg = fneg double %x.fabs
%result = call double @llvm.sqrt.f64(double %x.fabs.neg)
ret double %result
}
define double @v_sqrt_f64_ninf(double %x) {
; SDAG-LABEL: v_sqrt_f64_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call ninf double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_no_infs_attribute(double %x) "no-infs-fp-math"="true" {
; SDAG-LABEL: v_sqrt_f64_no_infs_attribute:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_no_infs_attribute:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call ninf double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_nnan(double %x) {
; SDAG-LABEL: v_sqrt_f64_nnan:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_nnan:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nnan double @llvm.sqrt.f64(double %x)
ret double %result
}
define amdgpu_ps <2 x i32> @s_sqrt_f64(double inreg %x) {
; SDAG-LABEL: s_sqrt_f64:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfrev_b32_e32 v1, 8
; SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SDAG-NEXT: s_and_b64 s[0:1], vcc, exec
; SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_readfirstlane_b32 s0, v0
; SDAG-NEXT: v_readfirstlane_b32 s1, v1
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: s_sqrt_f64:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s2, 0
; GISEL-NEXT: s_brev_b32 s3, 8
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-NEXT: ; return to shader part epilog
%result = call double @llvm.sqrt.f64(double %x)
%cast = bitcast double %result to <2 x i32>
%cast.0 = extractelement <2 x i32> %cast, i32 0
%cast.1 = extractelement <2 x i32> %cast, i32 1
%lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
%lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
%insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
ret <2 x i32> %insert.1
}
define amdgpu_ps <2 x i32> @s_sqrt_f64_ninf(double inreg %x) {
; SDAG-LABEL: s_sqrt_f64_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfrev_b32_e32 v1, 8
; SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SDAG-NEXT: s_and_b64 s[0:1], vcc, exec
; SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_readfirstlane_b32 s0, v0
; SDAG-NEXT: v_readfirstlane_b32 s1, v1
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: s_sqrt_f64_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s2, 0
; GISEL-NEXT: s_brev_b32 s3, 8
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-NEXT: ; return to shader part epilog
%result = call ninf double @llvm.sqrt.f64(double %x)
%cast = bitcast double %result to <2 x i32>
%cast.0 = extractelement <2 x i32> %cast, i32 0
%cast.1 = extractelement <2 x i32> %cast, i32 1
%lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
%lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
%insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
ret <2 x i32> %insert.1
}
define amdgpu_ps <2 x i32> @s_sqrt_f64_afn(double inreg %x) {
; SDAG-LABEL: s_sqrt_f64_afn:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfrev_b32_e32 v1, 8
; SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SDAG-NEXT: s_and_b64 s[0:1], vcc, exec
; SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_readfirstlane_b32 s0, v0
; SDAG-NEXT: v_readfirstlane_b32 s1, v1
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: s_sqrt_f64_afn:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s2, 0
; GISEL-NEXT: s_brev_b32 s3, 8
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-NEXT: ; return to shader part epilog
%result = call afn double @llvm.sqrt.f64(double %x)
%cast = bitcast double %result to <2 x i32>
%cast.0 = extractelement <2 x i32> %cast, i32 0
%cast.1 = extractelement <2 x i32> %cast, i32 1
%lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
%lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
%insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
ret <2 x i32> %insert.1
}
define amdgpu_ps <2 x i32> @s_sqrt_f64_afn_nnan_ninf(double inreg %x) {
; SDAG-LABEL: s_sqrt_f64_afn_nnan_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_bfrev_b32_e32 v1, 8
; SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; SDAG-NEXT: s_and_b64 s[0:1], vcc, exec
; SDAG-NEXT: s_cselect_b32 s0, 0xffffff80, 0
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0x260
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], s0
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_readfirstlane_b32 s0, v0
; SDAG-NEXT: v_readfirstlane_b32 s1, v1
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: s_sqrt_f64_afn_nnan_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b32 s2, 0
; GISEL-NEXT: s_brev_b32 s3, 8
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, s[0:1], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-NEXT: ; return to shader part epilog
%result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
%cast = bitcast double %result to <2 x i32>
%cast.0 = extractelement <2 x i32> %cast, i32 0
%cast.1 = extractelement <2 x i32> %cast, i32 1
%lane.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.0)
%lane.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast.1)
%insert.0 = insertelement <2 x i32> poison, i32 %lane.0, i32 0
%insert.1 = insertelement <2 x i32> %insert.0, i32 %lane.1, i32 1
ret <2 x i32> %insert.1
}
define double @v_sqrt_f64_nsz(double %x) {
; SDAG-LABEL: v_sqrt_f64_nsz:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_nsz:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_nnan_ninf(double %x) {
; SDAG-LABEL: v_sqrt_f64_nnan_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_nnan_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nnan ninf double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_nnan_ninf_nsz(double %x) {
; SDAG-LABEL: v_sqrt_f64_nnan_ninf_nsz:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_nnan_ninf_nsz:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nnan ninf nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_afn(double %x) {
; SDAG-LABEL: v_sqrt_f64_afn:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_afn:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_afn_nsz(double %x) {
; SDAG-LABEL: v_sqrt_f64_afn_nsz:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_afn_nsz:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define <2 x double> @v_sqrt_v2f64_afn(<2 x double> %x) {
; SDAG-LABEL: v_sqrt_v2f64_afn:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
; SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
; SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
; SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
; SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_v2f64_afn:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
ret <2 x double> %result
}
define double @v_sqrt_f64_afn_nnan(double %x) {
; SDAG-LABEL: v_sqrt_f64_afn_nnan:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_afn_nnan:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn nnan double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_fabs_afn_ninf(double %x) {
; SDAG-LABEL: v_sqrt_f64_fabs_afn_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_fabs_afn_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs = call double @llvm.fabs.f64(double %x)
%result = call afn ninf double @llvm.sqrt.f64(double %fabs)
ret double %result
}
define double @v_sqrt_f64_afn_nnan_ninf(double %x) {
; SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn nnan ninf double @llvm.sqrt.f64(double %x)
ret double %result
}
define <2 x double> @v_sqrt_v2f64_afn_nnan_ninf(<2 x double> %x) {
; SDAG-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
; SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
; SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
; SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
; SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_v2f64_afn_nnan_ninf:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
ret <2 x double> %result
}
define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
; SDAG-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64_afn_nnan_ninf_nsz:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call afn nnan ninf nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
; SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64__approx_func_fp_math:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
; SDAG-LABEL: v_sqrt_f64__enough_unsafe_attrs:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64__enough_unsafe_attrs:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64__unsafe_attr(double %x) #4 {
; SDAG-LABEL: v_sqrt_f64__unsafe_attr:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
; SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
; SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
; SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v5, 0x260
; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_f64__unsafe_attr:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call nsz double @llvm.sqrt.f64(double %x)
ret double %result
}
define <2 x double> @v_sqrt_v2f64(<2 x double> %x) {
; SDAG-LABEL: v_sqrt_v2f64:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s4, 0
; SDAG-NEXT: s_brev_b32 s5, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
; SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
; SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; SDAG-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; SDAG-NEXT: v_mul_f64 v[8:9], v[0:1], v[4:5]
; SDAG-NEXT: v_mul_f64 v[4:5], v[4:5], 0.5
; SDAG-NEXT: v_mul_f64 v[10:11], v[2:3], v[6:7]
; SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
; SDAG-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 0.5
; SDAG-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 0.5
; SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[8:9], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_fma_f64 v[10:11], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_fma_f64 v[12:13], -v[8:9], v[8:9], v[0:1]
; SDAG-NEXT: v_fma_f64 v[14:15], -v[10:11], v[10:11], v[2:3]
; SDAG-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[8:9]
; SDAG-NEXT: v_mov_b32_e32 v8, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v9, 0x260
; SDAG-NEXT: v_fma_f64 v[6:7], v[14:15], v[6:7], v[10:11]
; SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_v2f64:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: s_brev_b32 s5, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[4:5], v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v4, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5]
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v5
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
; GISEL-NEXT: v_rsq_f64_e32 v[4:5], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[2:3]
; GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5]
; GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[4:5], 0.5
; GISEL-NEXT: v_fma_f64 v[14:15], -v[10:11], v[6:7], 0.5
; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[4:5]
; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
; GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[6:7]
; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], -v[4:5], v[4:5], v[0:1]
; GISEL-NEXT: v_fma_f64 v[14:15], -v[6:7], v[6:7], v[2:3]
; GISEL-NEXT: v_fma_f64 v[4:5], v[12:13], v[8:9], v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v8, 0xffffff80
; GISEL-NEXT: v_fma_f64 v[6:7], v[14:15], v[10:11], v[6:7]
; GISEL-NEXT: v_mov_b32_e32 v9, 0x260
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[4:5]
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v9
; GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v9
; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v10
; GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v8
; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
ret <2 x double> %result
}
define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
; SDAG-LABEL: v_sqrt_v3f64:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: s_mov_b32 s6, 0
; SDAG-NEXT: s_brev_b32 s7, 8
; SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
; SDAG-NEXT: v_cmp_gt_f64_e64 s[4:5], s[6:7], v[2:3]
; SDAG-NEXT: v_cmp_gt_f64_e64 s[6:7], s[6:7], v[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; SDAG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v6
; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
; SDAG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v6
; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
; SDAG-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; SDAG-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; SDAG-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; SDAG-NEXT: v_rsq_f64_e32 v[8:9], v[2:3]
; SDAG-NEXT: v_rsq_f64_e32 v[10:11], v[4:5]
; SDAG-NEXT: v_mul_f64 v[12:13], v[0:1], v[6:7]
; SDAG-NEXT: v_mul_f64 v[6:7], v[6:7], 0.5
; SDAG-NEXT: v_mul_f64 v[14:15], v[2:3], v[8:9]
; SDAG-NEXT: v_mul_f64 v[8:9], v[8:9], 0.5
; SDAG-NEXT: v_mul_f64 v[16:17], v[4:5], v[10:11]
; SDAG-NEXT: v_mul_f64 v[10:11], v[10:11], 0.5
; SDAG-NEXT: v_fma_f64 v[18:19], -v[6:7], v[12:13], 0.5
; SDAG-NEXT: v_fma_f64 v[20:21], -v[8:9], v[14:15], 0.5
; SDAG-NEXT: v_fma_f64 v[22:23], -v[10:11], v[16:17], 0.5
; SDAG-NEXT: v_fma_f64 v[12:13], v[12:13], v[18:19], v[12:13]
; SDAG-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
; SDAG-NEXT: v_fma_f64 v[14:15], v[14:15], v[20:21], v[14:15]
; SDAG-NEXT: v_fma_f64 v[8:9], v[8:9], v[20:21], v[8:9]
; SDAG-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17]
; SDAG-NEXT: v_fma_f64 v[10:11], v[10:11], v[22:23], v[10:11]
; SDAG-NEXT: v_fma_f64 v[18:19], -v[12:13], v[12:13], v[0:1]
; SDAG-NEXT: v_fma_f64 v[20:21], -v[14:15], v[14:15], v[2:3]
; SDAG-NEXT: v_fma_f64 v[22:23], -v[16:17], v[16:17], v[4:5]
; SDAG-NEXT: v_fma_f64 v[12:13], v[18:19], v[6:7], v[12:13]
; SDAG-NEXT: v_fma_f64 v[14:15], v[20:21], v[8:9], v[14:15]
; SDAG-NEXT: v_fma_f64 v[16:17], v[22:23], v[10:11], v[16:17]
; SDAG-NEXT: v_fma_f64 v[18:19], -v[12:13], v[12:13], v[0:1]
; SDAG-NEXT: v_fma_f64 v[20:21], -v[14:15], v[14:15], v[2:3]
; SDAG-NEXT: v_fma_f64 v[22:23], -v[16:17], v[16:17], v[4:5]
; SDAG-NEXT: v_fma_f64 v[6:7], v[18:19], v[6:7], v[12:13]
; SDAG-NEXT: v_mov_b32_e32 v12, 0xffffff80
; SDAG-NEXT: v_mov_b32_e32 v13, 0x260
; SDAG-NEXT: v_fma_f64 v[8:9], v[20:21], v[8:9], v[14:15]
; SDAG-NEXT: v_cndmask_b32_e32 v14, 0, v12, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v15, 0, v12, s[4:5]
; SDAG-NEXT: v_fma_f64 v[10:11], v[22:23], v[10:11], v[16:17]
; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[6:7]
; SDAG-NEXT: v_ldexp_f64 v[6:7], v[6:7], v14
; SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v13
; SDAG-NEXT: v_ldexp_f64 v[8:9], v[8:9], v15
; SDAG-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], v13
; SDAG-NEXT: v_ldexp_f64 v[10:11], v[10:11], v12
; SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; SDAG-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
; SDAG-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_sqrt_v3f64:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b32 s6, 0
; GISEL-NEXT: s_brev_b32 s7, 8
; GISEL-NEXT: v_cmp_gt_f64_e32 vcc, s[6:7], v[0:1]
; GISEL-NEXT: v_cmp_gt_f64_e64 s[4:5], s[6:7], v[2:3]
; GISEL-NEXT: v_cmp_gt_f64_e64 s[6:7], s[6:7], v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v6, 0x100
; GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc
; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v7
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[6:7]
; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v7
; GISEL-NEXT: v_ldexp_f64 v[4:5], v[4:5], v6
; GISEL-NEXT: v_rsq_f64_e32 v[6:7], v[0:1]
; GISEL-NEXT: v_rsq_f64_e32 v[8:9], v[2:3]
; GISEL-NEXT: v_rsq_f64_e32 v[10:11], v[4:5]
; GISEL-NEXT: v_mul_f64 v[12:13], v[6:7], 0.5
; GISEL-NEXT: v_mul_f64 v[6:7], v[0:1], v[6:7]
; GISEL-NEXT: v_mul_f64 v[14:15], v[8:9], 0.5
; GISEL-NEXT: v_mul_f64 v[8:9], v[2:3], v[8:9]
; GISEL-NEXT: v_mul_f64 v[16:17], v[10:11], 0.5
; GISEL-NEXT: v_mul_f64 v[10:11], v[4:5], v[10:11]
; GISEL-NEXT: v_fma_f64 v[18:19], -v[12:13], v[6:7], 0.5
; GISEL-NEXT: v_fma_f64 v[20:21], -v[14:15], v[8:9], 0.5
; GISEL-NEXT: v_fma_f64 v[22:23], -v[16:17], v[10:11], 0.5
; GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7]
; GISEL-NEXT: v_fma_f64 v[12:13], v[12:13], v[18:19], v[12:13]
; GISEL-NEXT: v_fma_f64 v[8:9], v[8:9], v[20:21], v[8:9]
; GISEL-NEXT: v_fma_f64 v[14:15], v[14:15], v[20:21], v[14:15]
; GISEL-NEXT: v_fma_f64 v[10:11], v[10:11], v[22:23], v[10:11]
; GISEL-NEXT: v_fma_f64 v[16:17], v[16:17], v[22:23], v[16:17]
; GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[6:7], v[0:1]
; GISEL-NEXT: v_fma_f64 v[20:21], -v[8:9], v[8:9], v[2:3]
; GISEL-NEXT: v_fma_f64 v[22:23], -v[10:11], v[10:11], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[18:19], v[12:13], v[6:7]
; GISEL-NEXT: v_fma_f64 v[8:9], v[20:21], v[14:15], v[8:9]
; GISEL-NEXT: v_fma_f64 v[10:11], v[22:23], v[16:17], v[10:11]
; GISEL-NEXT: v_fma_f64 v[18:19], -v[6:7], v[6:7], v[0:1]
; GISEL-NEXT: v_fma_f64 v[20:21], -v[8:9], v[8:9], v[2:3]
; GISEL-NEXT: v_fma_f64 v[22:23], -v[10:11], v[10:11], v[4:5]
; GISEL-NEXT: v_fma_f64 v[6:7], v[18:19], v[12:13], v[6:7]
; GISEL-NEXT: v_mov_b32_e32 v12, 0xffffff80
; GISEL-NEXT: v_mov_b32_e32 v13, 0x260
; GISEL-NEXT: v_fma_f64 v[8:9], v[20:21], v[14:15], v[8:9]
; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v12, vcc
; GISEL-NEXT: v_fma_f64 v[10:11], v[22:23], v[16:17], v[10:11]
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, v12, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[6:7]
; GISEL-NEXT: v_ldexp_f64 v[6:7], v[6:7], v14
; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v13
; GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v13
; GISEL-NEXT: v_ldexp_f64 v[8:9], v[8:9], v15
; GISEL-NEXT: v_cmp_class_f64_e64 s[6:7], v[4:5], v13
; GISEL-NEXT: v_ldexp_f64 v[10:11], v[10:11], v12
; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v4, v10, v4, s[6:7]
; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, v5, s[6:7]
; GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %x)
ret <3 x double> %result
}
declare double @llvm.fabs.f64(double) #0
declare double @llvm.sqrt.f64(double) #0
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0
declare i32 @llvm.amdgcn.readfirstlane(i32) #1
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nounwind willreturn memory(none) }
attributes #2 = { "approx-func-fp-math"="true" }
attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
attributes #4 = { "unsafe-fp-math"="true" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}