Files
clang-p2996/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
Wenju He 338dee0742 [NFC][libclc] Refactor _CLC_*_VECTORIZE macros to functions in .inc files (#145678)
With this PR, if we have customized implementation for scalar or vector
length = 2, we don't need to write new macros, e.g.
https://github.com/intel/llvm/blob/fb18321705f6/libclc/clc/include/clc/clcmacro.h#L15

Undef __HALF_ONLY, __FLOAT_ONLY and __DOUBLE_ONLY at the end of
clc/include/clc/math/gentype.inc

llvm-diff shows no change to nvptx64--nvidiacl.bc and amdgcn--amdhsa.bc
2025-06-30 17:19:19 +08:00

52 lines
1.4 KiB
Common Lisp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/clcmacro.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_ldexp.h>
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#ifdef __AMDGCN__
#define __clc_builtin_rsq __builtin_amdgcn_rsq
#else
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
#endif
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
uint vcc = x < 0x1p-767;
uint exp0 = vcc ? 0x100 : 0;
unsigned exp1 = vcc ? 0xffffff80 : 0;
double v01 = __clc_ldexp(x, exp0);
double v23 = __clc_builtin_rsq(v01);
double v45 = v01 * v23;
v23 = v23 * 0.5;
double v67 = __clc_fma(-v23, v45, 0.5);
v45 = __clc_fma(v45, v67, v45);
double v89 = __clc_fma(-v45, v45, v01);
v23 = __clc_fma(v23, v67, v23);
v45 = __clc_fma(v89, v23, v45);
v67 = __clc_fma(-v45, v45, v01);
v23 = __clc_fma(v67, v23, v45);
v23 = __clc_ldexp(v23, exp1);
return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
}
#define __DOUBLE_ONLY
#define FUNCTION __clc_sqrt
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
#include <clc/math/gentype.inc>
#endif