[libclc] Add (fast) normalize to CLC; add half overloads (#139759)
For simplicity the half overloads just call into the float versions of the builtin. Otherwise there are no codegen changes to any target.
This commit is contained in:
22
libclc/clc/include/clc/geometric/clc_fast_normalize.h
Normal file
22
libclc/clc/include/clc/geometric/clc_fast_normalize.h
Normal file
@@ -0,0 +1,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
|
||||
#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_GEOMETRIC_RET_GENTYPE
|
||||
#define __CLC_FUNCTION __clc_fast_normalize
|
||||
#define __CLC_BODY <clc/geometric/unary_decl.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __CLC_GEOMETRIC_RET_GENTYPE
|
||||
#undef __FLOAT_ONLY
|
||||
|
||||
#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
|
||||
20
libclc/clc/include/clc/geometric/clc_normalize.h
Normal file
20
libclc/clc/include/clc/geometric/clc_normalize.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__
|
||||
#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__
|
||||
|
||||
#define __CLC_GEOMETRIC_RET_GENTYPE
|
||||
#define __CLC_FUNCTION __clc_normalize
|
||||
#define __CLC_BODY <clc/geometric/unary_decl.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __CLC_GEOMETRIC_RET_GENTYPE
|
||||
|
||||
#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__
|
||||
@@ -9,7 +9,9 @@ geometric/clc_distance.cl
|
||||
geometric/clc_dot.cl
|
||||
geometric/clc_fast_distance.cl
|
||||
geometric/clc_fast_length.cl
|
||||
geometric/clc_fast_normalize.cl
|
||||
geometric/clc_length.cl
|
||||
geometric/clc_normalize.cl
|
||||
integer/clc_abs.cl
|
||||
integer/clc_abs_diff.cl
|
||||
integer/clc_add_sat.cl
|
||||
|
||||
15
libclc/clc/lib/generic/geometric/clc_fast_normalize.cl
Normal file
15
libclc/clc/lib/generic/geometric/clc_fast_normalize.cl
Normal file
@@ -0,0 +1,15 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/geometric/clc_dot.h>
|
||||
#include <clc/geometric/clc_normalize.h>
|
||||
#include <clc/math/clc_half_rsqrt.h>
|
||||
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_BODY <clc_fast_normalize.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
23
libclc/clc/lib/generic/geometric/clc_fast_normalize.inc
Normal file
23
libclc/clc/lib/generic/geometric/clc_fast_normalize.inc
Normal file
@@ -0,0 +1,23 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_VECSIZE_OR_1 == 1
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
|
||||
return __clc_normalize(p);
|
||||
}
|
||||
|
||||
#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \
|
||||
__CLC_VECSIZE_OR_1 == 4)
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
|
||||
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
|
||||
return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2);
|
||||
}
|
||||
|
||||
#endif
|
||||
22
libclc/clc/lib/generic/geometric/clc_normalize.cl
Normal file
22
libclc/clc/lib/generic/geometric/clc_normalize.cl
Normal file
@@ -0,0 +1,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/common/clc_sign.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/geometric/clc_dot.h>
|
||||
#include <clc/geometric/clc_normalize.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_copysign.h>
|
||||
#include <clc/math/clc_rsqrt.h>
|
||||
#include <clc/relational/clc_all.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_select.h>
|
||||
|
||||
#define __CLC_BODY <clc_normalize.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
84
libclc/clc/lib/generic/geometric/clc_normalize.inc
Normal file
84
libclc/clc/lib/generic/geometric/clc_normalize.inc
Normal file
@@ -0,0 +1,84 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
|
||||
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
|
||||
|
||||
// Until we have a native FP16 implementation, go via FP32
|
||||
#if __CLC_FPSIZE == 16
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
|
||||
return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p)));
|
||||
}
|
||||
|
||||
// Scalar normalize
|
||||
#elif defined(__CLC_SCALAR)
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
|
||||
return __clc_sign(p);
|
||||
}
|
||||
|
||||
// Vector normalize
|
||||
#else
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
|
||||
#define MIN_VAL FLT_MIN
|
||||
#define MAX_SQRT 0x1.0p+86F
|
||||
#if __CLC_VECSIZE_OR_1 == 2
|
||||
#define MIN_SQRT 0x1.0p-65F
|
||||
#else
|
||||
#define MIN_SQRT 0x1.0p-66F
|
||||
#endif
|
||||
|
||||
#elif __CLC_FPSIZE == 64
|
||||
|
||||
#define MIN_VAL DBL_MIN
|
||||
#define MAX_SQRT 0x1.0p+563
|
||||
#if __CLC_VECSIZE_OR_1 == 2
|
||||
#define MIN_SQRT 0x1.0p-513
|
||||
#else
|
||||
#define MIN_SQRT 0x1.0p-514
|
||||
#endif
|
||||
|
||||
#else
|
||||
#error "Invalid FP size"
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
|
||||
if (__clc_all(p == __CLC_FP_LIT(0.0))) {
|
||||
return p;
|
||||
}
|
||||
|
||||
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
|
||||
|
||||
if (l2 < MIN_VAL) {
|
||||
p *= MAX_SQRT;
|
||||
l2 = __clc_dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= MIN_SQRT;
|
||||
l2 = __clc_dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0),
|
||||
(__CLC_GENTYPE)__CLC_FP_LIT(1.0),
|
||||
__clc_isinf(p)),
|
||||
p);
|
||||
l2 = __clc_dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * __clc_rsqrt(l2);
|
||||
}
|
||||
|
||||
#undef MIN_VAL
|
||||
#undef MIN_SQRT
|
||||
#undef MAX_SQRT
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -6,11 +6,12 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/geometric/clc_fast_normalize.h>
|
||||
#include <clc/opencl/clc.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) { return normalize(p); }
|
||||
|
||||
#define __CLC_BODY <fast_normalize.inc>
|
||||
#define FUNCTION fast_normalize
|
||||
#define __FLOAT_ONLY
|
||||
#define __CLC_GEOMETRIC_RET_GENTYPE
|
||||
#define __CLC_BODY <clc/geometric/unary_def.inc>
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
#undef __FLOAT_ONLY
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Geometric functions are only defined for scalar, vec2, vec3 and vec4
|
||||
// Only handle vector implementations
|
||||
#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \
|
||||
__CLC_VECSIZE_OR_1 == 4)
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fast_normalize(__CLC_GENTYPE p) {
|
||||
__CLC_SCALAR_GENTYPE l2 = dot(p, p);
|
||||
return l2 == 0.0f ? p : p * half_rsqrt(l2);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -6,134 +6,11 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/geometric/clc_normalize.h>
|
||||
#include <clc/opencl/clc.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float normalize(float p) { return sign(p); }
|
||||
#define FUNCTION normalize
|
||||
#define __CLC_GEOMETRIC_RET_GENTYPE
|
||||
#define __CLC_BODY <clc/geometric/unary_def.inc>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
|
||||
if (all(p == (float2)0.0F))
|
||||
return p;
|
||||
|
||||
float l2 = dot(p, p);
|
||||
|
||||
if (l2 < FLT_MIN) {
|
||||
p *= 0x1.0p+86F;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-65f;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
|
||||
if (all(p == (float3)0.0F))
|
||||
return p;
|
||||
|
||||
float l2 = dot(p, p);
|
||||
|
||||
if (l2 < FLT_MIN) {
|
||||
p *= 0x1.0p+86F;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-66f;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
|
||||
if (all(p == (float4)0.0F))
|
||||
return p;
|
||||
|
||||
float l2 = dot(p, p);
|
||||
|
||||
if (l2 < FLT_MIN) {
|
||||
p *= 0x1.0p+86F;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-66f;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double normalize(double p) { return sign(p); }
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
|
||||
if (all(p == (double2)0.0))
|
||||
return p;
|
||||
|
||||
double l2 = dot(p, p);
|
||||
|
||||
if (l2 < DBL_MIN) {
|
||||
p *= 0x1.0p+563;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-513;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
|
||||
if (all(p == (double3)0.0))
|
||||
return p;
|
||||
|
||||
double l2 = dot(p, p);
|
||||
|
||||
if (l2 < DBL_MIN) {
|
||||
p *= 0x1.0p+563;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-514;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
|
||||
if (all(p == (double4)0.0))
|
||||
return p;
|
||||
|
||||
double l2 = dot(p, p);
|
||||
|
||||
if (l2 < DBL_MIN) {
|
||||
p *= 0x1.0p+563;
|
||||
l2 = dot(p, p);
|
||||
} else if (l2 == INFINITY) {
|
||||
p *= 0x1.0p-514;
|
||||
l2 = dot(p, p);
|
||||
if (l2 == INFINITY) {
|
||||
p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
|
||||
l2 = dot(p, p);
|
||||
}
|
||||
}
|
||||
return p * rsqrt(l2);
|
||||
}
|
||||
|
||||
#endif
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
Reference in New Issue
Block a user