diff --git a/libclc/clc/include/clc/math/clc_ilogb.h b/libclc/clc/include/clc/math/clc_ilogb.h new file mode 100644 index 000000000000..6aeaf6be3ac6 --- /dev/null +++ b/libclc/clc/include/clc/math/clc_ilogb.h @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_ILOGB_H__ +#define __CLC_MATH_CLC_ILOGB_H__ + +#define __CLC_FUNCTION __clc_ilogb +#define __CLC_BODY +#include + +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_ILOGB_H__ diff --git a/libclc/clc/include/clc/math/clc_logb.h b/libclc/clc/include/clc/math/clc_logb.h new file mode 100644 index 000000000000..75a2ce7e578b --- /dev/null +++ b/libclc/clc/include/clc/math/clc_logb.h @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MATH_CLC_LOGB_H__ +#define __CLC_MATH_CLC_LOGB_H__ + +#define __CLC_FUNCTION __clc_logb +#define __CLC_BODY +#include + +#undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_LOGB_H__ diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h index d63c5a8d7bec..5df53f1b5e5d 100644 --- a/libclc/clc/include/clc/math/math.h +++ b/libclc/clc/include/clc/math/math.h @@ -52,6 +52,7 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); #define INDEFBITPATT_SP32 0xffc00000 #define PINFBITPATT_SP32 0x7f800000 #define NINFBITPATT_SP32 0xff800000 +#define NUMEXPBITS_SP32 8 #define EXPBIAS_SP32 127 #define EXPSHIFTBITS_SP32 23 #define BIASEDEMIN_SP32 1 @@ -62,6 +63,8 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void); #define MANTLENGTH_SP32 24 #define BASEDIGITS_SP32 7 +#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32) + _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) { int ix = __clc_as_int(x); if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) && @@ -86,6 +89,7 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) { #define INDEFBITPATT_DP64 0xfff8000000000000L #define PINFBITPATT_DP64 0x7ff0000000000000L #define NINFBITPATT_DP64 0xfff0000000000000L +#define NUMEXPBITS_DP64 11 #define EXPBIAS_DP64 1023 #define EXPSHIFTBITS_DP64 52 #define BIASEDEMIN_DP64 1 @@ -96,8 +100,26 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) { #define MANTLENGTH_DP64 53 #define BASEDIGITS_DP64 15 +#define LOG_MAGIC_NUM_DP64 (1 + NUMEXPBITS_DP64 - EXPBIAS_DP64) + #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 + +#define SIGNBIT_FP16 0x8000 +#define EXSIGNBIT_FP16 0x7fff +#define EXPBITS_FP16 0x7c00 +#define MANTBITS_FP16 0x03ff +#define PINFBITPATT_FP16 0x7c00 +#define NINFBITPATT_FP16 0xfc00 +#define NUMEXPBITS_FP16 5 +#define EXPBIAS_FP16 15 +#define EXPSHIFTBITS_FP16 10 + +#define LOG_MAGIC_NUM_FP16 (1 + NUMEXPBITS_FP16 - EXPBIAS_FP16) + +#endif // cl_khr_fp16 + #define ALIGNED(x) __attribute__((aligned(x))) #endif // __CLC_MATH_MATH_H__ diff --git a/libclc/generic/include/clc/math/ilogb.inc b/libclc/clc/include/clc/math/unary_decl_with_int_return.inc similarity index 84% rename from libclc/generic/include/clc/math/ilogb.inc rename to libclc/clc/include/clc/math/unary_decl_with_int_return.inc index 7c6293bb22e4..333a752ced39 100644 --- a/libclc/generic/include/clc/math/ilogb.inc +++ b/libclc/clc/include/clc/math/unary_decl_with_int_return.inc @@ -6,4 +6,4 @@ // //===----------------------------------------------------------------------===// -_CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x); +_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_GENTYPE x); diff --git a/libclc/clc/include/clc/math/unary_def_with_int_return.inc b/libclc/clc/include/clc/math/unary_def_with_int_return.inc new file mode 100644 index 000000000000..33be58bf4584 --- /dev/null +++ b/libclc/clc/include/clc/math/unary_def_with_int_return.inc @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_GENTYPE a) { + return __CLC_FUNCTION(FUNCTION)(a); +} diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 16f1c0fc6998..a082dc6f1fa0 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -69,6 +69,7 @@ math/clc_half_sin.cl math/clc_half_sqrt.cl math/clc_half_tan.cl math/clc_hypot.cl +math/clc_ilogb.cl math/clc_ldexp.cl math/clc_lgamma.cl math/clc_lgamma_r.cl @@ -76,6 +77,7 @@ math/clc_log.cl math/clc_log10.cl math/clc_log1p.cl math/clc_log2.cl +math/clc_logb.cl math/clc_mad.cl math/clc_maxmag.cl math/clc_minmag.cl diff --git a/libclc/clc/lib/generic/math/clc_ilogb.cl b/libclc/clc/lib/generic/math/clc_ilogb.cl new file mode 100644 index 000000000000..c33ed9fe9b04 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_ilogb.cl @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#define __CLC_BODY +#include diff --git a/libclc/clc/lib/generic/math/clc_ilogb.inc b/libclc/clc/lib/generic/math/clc_ilogb.inc new file mode 100644 index 000000000000..acbc70a9d1be --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_ilogb.inc @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) { + __CLC_UINTN ux = __CLC_AS_UINTN(x); + __CLC_UINTN ax = ux & EXSIGNBIT_SP32; + __CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_SP32 - + __CLC_AS_INTN(__clc_clz(ux & MANTBITS_SP32)); + __CLC_INTN r = __CLC_AS_INTN(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; + r = ax < 0x00800000U ? rs : r; + r = ax == 0 ? FP_ILOGB0 : r; + + // We could merge those 2 tests and have: + // + // r = ax >= EXPBITS_SP32 ? 0x7fffffff : r + // + // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and + // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code. + r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r; + r = ax == EXPBITS_SP32 ? 0x7fffffff : r; + return r; +} + +#endif + +#if __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) { + __CLC_ULONGN ux = __CLC_AS_ULONGN(x); + __CLC_ULONGN ax = ux & ~SIGNBIT_DP64; + __CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_DP64 - + __CLC_CONVERT_INTN(__clc_clz(ax & MANTBITS_DP64)); + __CLC_INTN r = __CLC_CONVERT_INTN(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; + r = __CLC_CONVERT_INTN(ax < 0x0010000000000000UL) ? rs : r; + r = __CLC_CONVERT_INTN(ax == 0UL) ? (__CLC_INTN)FP_ILOGB0 : r; + + // We could merge those 2 tests and have: + // + // r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r + // + // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and + // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code. + r = __CLC_CONVERT_INTN(ax > 0x7ff0000000000000UL) ? FP_ILOGBNAN : r; + r = __CLC_CONVERT_INTN(ax == 0x7ff0000000000000UL) ? 0x7fffffff : r; + return r; +} + +#endif + +#if __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) { + __CLC_USHORTN ux = __CLC_AS_USHORTN(x); + __CLC_USHORTN ax = ux & (__CLC_USHORTN)EXSIGNBIT_FP16; + __CLC_USHORTN mantx = ux & (__CLC_USHORTN)MANTBITS_FP16; + __CLC_INTN rs = + (__CLC_INTN)LOG_MAGIC_NUM_FP16 - __CLC_CONVERT_INTN(__clc_clz(mantx)); + __CLC_INTN r = + __CLC_CONVERT_INTN(ax >> (__CLC_USHORTN)EXPSHIFTBITS_FP16) - EXPBIAS_FP16; + r = __CLC_CONVERT_INTN(ax < (__CLC_USHORTN)0x0400U) ? rs : r; + r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)0) ? (__CLC_INTN)FP_ILOGB0 : r; + + // We could merge those 2 tests and have: + // + // r = ax >= EXPBITS_FP16 ? 0x7fffffff : r + // + // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and + // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code. + r = __CLC_CONVERT_INTN(ax > (__CLC_USHORTN)EXPBITS_FP16) ? FP_ILOGBNAN : r; + r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)EXPBITS_FP16) ? 0x7fffffff : r; + return r; +} + +#endif diff --git a/libclc/clc/lib/generic/math/clc_logb.cl b/libclc/clc/lib/generic/math/clc_logb.cl new file mode 100644 index 000000000000..f571a11d0c38 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_logb.cl @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#define __CLC_BODY +#include diff --git a/libclc/clc/lib/generic/math/clc_logb.inc b/libclc/clc/lib/generic/math/clc_logb.inc new file mode 100644 index 000000000000..dde394886de2 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_logb.inc @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_FPSIZE == 32 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) { + __CLC_INTN ax = __CLC_AS_INTN(x) & EXSIGNBIT_SP32; + __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_SP32 - __clc_clz(ax)); + __CLC_GENTYPE r = + __CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32); + r = ax >= PINFBITPATT_SP32 ? __CLC_AS_GENTYPE(ax) : r; + r = ax < 0x00800000 ? s : r; + r = ax == 0 ? __CLC_AS_GENTYPE((__CLC_INTN)NINFBITPATT_SP32) : r; + return r; +} + +#endif + +#if __CLC_FPSIZE == 64 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) { + __CLC_LONGN ax = __CLC_AS_LONGN(x) & EXSIGNBIT_DP64; + __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_DP64 - __clc_clz(ax)); + __CLC_GENTYPE r = + __CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); + r = ax >= PINFBITPATT_DP64 ? __CLC_AS_GENTYPE(ax) : r; + r = ax < 0x0010000000000000L ? s : r; + r = ax == 0L ? __CLC_AS_GENTYPE((__CLC_LONGN)NINFBITPATT_DP64) : r; + return r; +} + +#endif + +#if __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) { + __CLC_SHORTN ax = __CLC_AS_SHORTN(x) & (__CLC_SHORTN)EXSIGNBIT_FP16; + __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE((__CLC_SHORTN)LOG_MAGIC_NUM_FP16 - + (__CLC_SHORTN)__clc_clz(ax)); + __CLC_GENTYPE r = __CLC_CONVERT_GENTYPE( + (ax >> (__CLC_SHORTN)EXPSHIFTBITS_FP16) - (__CLC_SHORTN)EXPBIAS_FP16); + r = ax >= (__CLC_SHORTN)PINFBITPATT_FP16 ? __CLC_AS_GENTYPE(ax) : r; + r = ax < (__CLC_SHORTN)0x0400 ? s : r; + r = ax == (__CLC_SHORTN)0 ? __CLC_AS_GENTYPE((__CLC_SHORTN)NINFBITPATT_FP16) + : r; + return r; +} + +#endif diff --git a/libclc/generic/include/clc/math/ilogb.h b/libclc/generic/include/clc/math/ilogb.h index dd38c9df3686..45d8ea064b46 100644 --- a/libclc/generic/include/clc/math/ilogb.h +++ b/libclc/generic/include/clc/math/ilogb.h @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// -#define __CLC_BODY +#define __CLC_FUNCTION ilogb +#define __CLC_BODY #include + +#undef __CLC_FUNCTION diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl index 42097ea09da0..9e3795a524b2 100644 --- a/libclc/generic/lib/math/ilogb.cl +++ b/libclc/generic/lib/math/ilogb.cl @@ -7,64 +7,8 @@ //===----------------------------------------------------------------------===// #include -#include -#include +#include -_CLC_OVERLOAD _CLC_DEF int ilogb(float x) { - uint ux = as_uint(x); - uint ax = ux & EXSIGNBIT_SP32; - int rs = -118 - (int) clz(ux & MANTBITS_SP32); - int r = (int) (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - r = ax < 0x00800000U ? rs : r; - r = ax == 0 ? FP_ILOGB0 : r; - - // We could merge those 2 tests and have: - // - // r = ax >= EXPBITS_SP32 ? 0x7fffffff : r - // - // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and - // FP_ILOGBNAN can change without requiring changes to ilogb() code. - r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r; - r = ax == EXPBITS_SP32 ? 0x7fffffff : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, float); - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF int ilogb(double x) { - ulong ux = as_ulong(x); - ulong ax = ux & ~SIGNBIT_DP64; - int r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; - int rs = -1011 - (int) clz(ax & MANTBITS_DP64); - r = ax < 0x0010000000000000UL ? rs : r; - r = ax == 0UL ? FP_ILOGB0 : r; - - // We could merge those 2 tests and have: - // - // r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r - // - // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and - // FP_ILOGBNAN can change without requiring changes to ilogb() code. - r = ax > 0x7ff0000000000000UL ? FP_ILOGBNAN : r; - r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double); - -#endif // cl_khr_fp64 - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF int ilogb(half x) { - return ilogb((float)x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half); - -#endif +#define FUNCTION ilogb +#define __CLC_BODY +#include diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl index 462355033f91..25e676ef98fb 100644 --- a/libclc/generic/lib/math/logb.cl +++ b/libclc/generic/lib/math/logb.cl @@ -7,41 +7,8 @@ //===----------------------------------------------------------------------===// #include -#include -#include +#include -_CLC_OVERLOAD _CLC_DEF float logb(float x) { - int ax = as_int(x) & EXSIGNBIT_SP32; - float s = -118 - clz(ax); - float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; - r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r; - r = ax < 0x00800000 ? s : r; - r = ax == 0 ? as_float(NINFBITPATT_SP32) : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float); - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double logb(double x) { - long ax = as_long(x) & EXSIGNBIT_DP64; - double s = -1011L - clz(ax); - double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; - r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r; - r = ax < 0x0010000000000000L ? s : r; - r = ax == 0L ? as_double(NINFBITPATT_DP64) : r; - return r; -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN_FP16(logb) - -#endif +#define FUNCTION logb +#define __CLC_BODY +#include