diff --git a/libclc/clc/include/clc/geometric/clc_fast_normalize.h b/libclc/clc/include/clc/geometric/clc_fast_normalize.h new file mode 100644 index 000000000000..66eed8b83ab1 --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_fast_normalize.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ +#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ + +#define __FLOAT_ONLY +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_FUNCTION __clc_fast_normalize +#define __CLC_BODY +#include + +#undef __CLC_FUNCTION +#undef __CLC_GEOMETRIC_RET_GENTYPE +#undef __FLOAT_ONLY + +#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ diff --git a/libclc/clc/include/clc/geometric/clc_normalize.h b/libclc/clc/include/clc/geometric/clc_normalize.h new file mode 100644 index 000000000000..3058a72b2bbb --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_normalize.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__ +#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__ + +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_FUNCTION __clc_normalize +#define __CLC_BODY +#include + +#undef __CLC_FUNCTION +#undef __CLC_GEOMETRIC_RET_GENTYPE + +#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__ diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 0b5a805cfc33..d285bbba3dd2 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -9,7 +9,9 @@ geometric/clc_distance.cl geometric/clc_dot.cl geometric/clc_fast_distance.cl geometric/clc_fast_length.cl +geometric/clc_fast_normalize.cl geometric/clc_length.cl +geometric/clc_normalize.cl integer/clc_abs.cl integer/clc_abs_diff.cl integer/clc_add_sat.cl diff --git a/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl new file mode 100644 index 000000000000..85684d0f49bc --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#define __FLOAT_ONLY +#define __CLC_BODY +#include diff --git a/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc b/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc new file mode 100644 index 000000000000..e4c3ab2c5a65 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_VECSIZE_OR_1 == 1 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) { + return __clc_normalize(p); +} + +#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \ + __CLC_VECSIZE_OR_1 == 4) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) { + __CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p); + return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2); +} + +#endif diff --git a/libclc/clc/lib/generic/geometric/clc_normalize.cl b/libclc/clc/lib/generic/geometric/clc_normalize.cl new file mode 100644 index 000000000000..ec4a67647c13 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_normalize.cl @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __CLC_BODY +#include diff --git a/libclc/clc/lib/generic/geometric/clc_normalize.inc b/libclc/clc/lib/generic/geometric/clc_normalize.inc new file mode 100644 index 000000000000..8a47c6d4826e --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_normalize.inc @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// + +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \ + __CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4) + +// Until we have a native FP16 implementation, go via FP32 +#if __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p))); +} + +// Scalar normalize +#elif defined(__CLC_SCALAR) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + return __clc_sign(p); +} + +// Vector normalize +#else + +#if __CLC_FPSIZE == 32 + +#define MIN_VAL FLT_MIN +#define MAX_SQRT 0x1.0p+86F +#if __CLC_VECSIZE_OR_1 == 2 +#define MIN_SQRT 0x1.0p-65F +#else +#define MIN_SQRT 0x1.0p-66F +#endif + +#elif __CLC_FPSIZE == 64 + +#define MIN_VAL DBL_MIN +#define MAX_SQRT 0x1.0p+563 +#if __CLC_VECSIZE_OR_1 == 2 +#define MIN_SQRT 0x1.0p-513 +#else +#define MIN_SQRT 0x1.0p-514 +#endif + +#else +#error "Invalid FP size" +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + if (__clc_all(p == __CLC_FP_LIT(0.0))) { + return p; + } + + __CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p); + + if (l2 < MIN_VAL) { + p *= MAX_SQRT; + l2 = __clc_dot(p, p); + } else if (l2 == INFINITY) { + p *= MIN_SQRT; + l2 = __clc_dot(p, p); + if (l2 == INFINITY) { + p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0), + (__CLC_GENTYPE)__CLC_FP_LIT(1.0), + __clc_isinf(p)), + p); + l2 = __clc_dot(p, p); + } + } + return p * __clc_rsqrt(l2); +} + +#undef MIN_VAL +#undef MIN_SQRT +#undef MAX_SQRT + +#endif + +#endif diff --git a/libclc/opencl/lib/generic/geometric/fast_normalize.cl b/libclc/opencl/lib/generic/geometric/fast_normalize.cl index d6d140f3c406..938b30093b8d 100644 --- a/libclc/opencl/lib/generic/geometric/fast_normalize.cl +++ b/libclc/opencl/lib/generic/geometric/fast_normalize.cl @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) { return normalize(p); } - -#define __CLC_BODY +#define FUNCTION fast_normalize #define __FLOAT_ONLY +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_BODY + #include -#undef __FLOAT_ONLY diff --git a/libclc/opencl/lib/generic/geometric/fast_normalize.inc b/libclc/opencl/lib/generic/geometric/fast_normalize.inc deleted file mode 100644 index ec1be94efc27..000000000000 --- a/libclc/opencl/lib/generic/geometric/fast_normalize.inc +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Geometric functions are only defined for scalar, vec2, vec3 and vec4 -// Only handle vector implementations -#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \ - __CLC_VECSIZE_OR_1 == 4) - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fast_normalize(__CLC_GENTYPE p) { - __CLC_SCALAR_GENTYPE l2 = dot(p, p); - return l2 == 0.0f ? p : p * half_rsqrt(l2); -} - -#endif diff --git a/libclc/opencl/lib/generic/geometric/normalize.cl b/libclc/opencl/lib/generic/geometric/normalize.cl index e1e51b306a1b..914f493f2cb2 100644 --- a/libclc/opencl/lib/generic/geometric/normalize.cl +++ b/libclc/opencl/lib/generic/geometric/normalize.cl @@ -6,134 +6,11 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_OVERLOAD _CLC_DEF float normalize(float p) { return sign(p); } +#define FUNCTION normalize +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_BODY -_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) { - if (all(p == (float2)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-65f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) { - if (all(p == (float3)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-66f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) { - if (all(p == (float4)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-66f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double normalize(double p) { return sign(p); } - -_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) { - if (all(p == (double2)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-513; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) { - if (all(p == (double3)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-514; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) { - if (all(p == (double4)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-514; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -#endif +#include