This commit moves the remaining FP64 sin and cos helper functions to the CLC library. As a consequence, it formally moves all sin, cos and sincos builtins to the CLC library. Previously, the FP16 and FP32 were nominally there but still in the OpenCL layer while waiting for the FP64 ones. The FP64 builtins are now vectorized as the FP16 and FP32 ones were earlier. One helper table had to be changed. It was previously a table of bytes loaded by each work-item as uint4. Since this doesn't vectorize well, the table was split to load two ulongNs per work-item. While this might not be as efficient on some devices, one mitigating factor is that we were previously loading 48 bytes per work-item in total, but only using 40 of them. With this commit we only load the bytes we need.
64 lines
1.9 KiB
C++
64 lines
1.9 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#if __CLC_FPSIZE == 32
|
|
|
|
_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_cos(__CLC_FLOATN x) {
|
|
__CLC_FLOATN absx = __clc_fabs(x);
|
|
|
|
__CLC_FLOATN r0, r1;
|
|
__CLC_INTN regn = __clc_argReductionS(&r0, &r1, absx);
|
|
|
|
__CLC_FLOATN ss = -__clc_sinf_piby4(r0, r1);
|
|
__CLC_FLOATN cc = __clc_cosf_piby4(r0, r1);
|
|
|
|
__CLC_FLOATN c = (regn & 1) != 0 ? ss : cc;
|
|
c = __CLC_AS_FLOATN(__CLC_AS_INTN(c) ^ ((regn > 1) << 31));
|
|
|
|
c = __clc_select(c, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isinf(x));
|
|
|
|
return c;
|
|
}
|
|
|
|
#elif __CLC_FPSIZE == 16
|
|
|
|
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
|
|
return __CLC_CONVERT_GENTYPE(__clc_cos(__CLC_CONVERT_FLOATN(x)));
|
|
}
|
|
|
|
#elif __CLC_FPSIZE == 64
|
|
|
|
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) {
|
|
__CLC_GENTYPE absx = __clc_fabs(x);
|
|
|
|
__CLC_BIT_INTN is_medium = absx < 0x1.0p+47;
|
|
|
|
__CLC_INTN regn_m, regn_l;
|
|
__CLC_GENTYPE r_m, r_l, rr_m, rr_l;
|
|
|
|
__clc_remainder_piby2_medium(absx, &r_m, &rr_m, ®n_m);
|
|
__clc_remainder_piby2_large(absx, &r_l, &rr_l, ®n_l);
|
|
|
|
__CLC_GENTYPE r = is_medium ? r_m : r_l;
|
|
__CLC_GENTYPE rr = is_medium ? rr_m : rr_l;
|
|
__CLC_INTN regn = __CLC_CONVERT_INTN(is_medium) ? regn_m : regn_l;
|
|
|
|
__CLC_GENTYPE sinval, cosval;
|
|
__clc_sincos_piby4(r, rr, &sinval, &cosval);
|
|
sinval = -sinval;
|
|
|
|
__CLC_LONGN c =
|
|
__CLC_AS_LONGN(__CLC_CONVERT_BIT_INTN((regn & 1) != 0) ? sinval : cosval);
|
|
c ^= __CLC_CONVERT_BIT_INTN(regn > 1) << 63;
|
|
|
|
return __clc_isnan(absx) | __clc_isinf(absx) ? __CLC_GENTYPE_NAN
|
|
: __CLC_AS_GENTYPE(c);
|
|
}
|
|
|
|
#endif
|