[libclc] Move fdim to CLC library; simplify (#137811)
This commit moves the fdim builtin to the CLC library. It simultaneously simplifies the codegen, unifying it between scalar and vector and avoiding bithacking for vector types.
This commit is contained in:
20
libclc/clc/include/clc/math/clc_fdim.h
Normal file
20
libclc/clc/include/clc/math/clc_fdim.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef __CLC_MATH_CLC_FDIM_H__
|
||||
#define __CLC_MATH_CLC_FDIM_H__
|
||||
|
||||
#define __CLC_BODY <clc/shared/binary_decl.inc>
|
||||
#define __CLC_FUNCTION __clc_fdim
|
||||
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
#undef __CLC_BODY
|
||||
#undef __CLC_FUNCTION
|
||||
|
||||
#endif // __CLC_MATH_CLC_FDIM_H__
|
||||
@@ -57,6 +57,8 @@
|
||||
#define __CLC_CONVERT_UINTN __CLC_XCONCAT(__clc_convert_, __CLC_UINTN)
|
||||
#define __CLC_CONVERT_ULONGN __CLC_XCONCAT(__clc_convert_, __CLC_ULONGN)
|
||||
|
||||
#define __CLC_CONVERT_BIT_INTN __CLC_XCONCAT(__clc_convert_, __CLC_BIT_INTN)
|
||||
|
||||
// See definitions of __CLC_S_GENTYPE/__CLC_U_GENTYPE below, which depend on the
|
||||
// specific size of floating-point type. These are the signed and unsigned
|
||||
// integers of the same bitwidth and element count as the GENTYPE. They match
|
||||
@@ -329,6 +331,8 @@
|
||||
#undef __CLC_CONVERT_UINTN
|
||||
#undef __CLC_CONVERT_ULONGN
|
||||
|
||||
#undef __CLC_CONVERT_BIT_INTN
|
||||
|
||||
#undef __CLC_ULONGN
|
||||
#undef __CLC_UINTN
|
||||
#undef __CLC_USHORTN
|
||||
|
||||
@@ -41,6 +41,7 @@ math/clc_exp2.cl
|
||||
math/clc_expm1.cl
|
||||
math/clc_exp_helper.cl
|
||||
math/clc_fabs.cl
|
||||
math/clc_fdim.cl
|
||||
math/clc_fma.cl
|
||||
math/clc_fmax.cl
|
||||
math/clc_fmin.cl
|
||||
|
||||
18
libclc/clc/lib/generic/math/clc_fdim.cl
Normal file
18
libclc/clc/lib/generic/math/clc_fdim.cl
Normal file
@@ -0,0 +1,18 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_convert.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_fmax.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/relational/clc_select.h>
|
||||
|
||||
#define __CLC_BODY <clc_fdim.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
15
libclc/clc/lib/generic/math/clc_fdim.inc
Normal file
15
libclc/clc/lib/generic/math/clc_fdim.inc
Normal file
@@ -0,0 +1,15 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fdim(__CLC_GENTYPE x,
|
||||
__CLC_GENTYPE y) {
|
||||
return __clc_select(
|
||||
__builtin_elementwise_max(x - y, (__CLC_GENTYPE)__CLC_FP_LIT(0.0)),
|
||||
__CLC_GENTYPE_NAN,
|
||||
__CLC_CONVERT_BIT_INTN(__clc_isnan(x) || __clc_isnan(y)));
|
||||
}
|
||||
@@ -7,7 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/math/clc_fdim.h>
|
||||
|
||||
#define __CLC_BODY <fdim.inc>
|
||||
#define FUNCTION fdim
|
||||
#define __CLC_BODY <clc/shared/binary_def.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
#ifdef __CLC_SCALAR
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, __CLC_GENTYPE y) {
|
||||
if (__builtin_isnan(x) || __builtin_isnan(y))
|
||||
return as_float(QNANBITPATT_SP32);
|
||||
return fmax(x - y, 0.0f);
|
||||
}
|
||||
#define __CLC_FDIM_VEC(width) \
|
||||
_CLC_OVERLOAD _CLC_DEF float##width fdim(float##width x, float##width y) { \
|
||||
/* Determine if x or y is NaN. */ \
|
||||
/* Vector true is -1, i.e. all-bits-set, and NaN==NaN is false. */ \
|
||||
/* If either is NaN, then ~((x==x) & (y==y)) will be 0 (e.g. ~(-1)), as will n. */ \
|
||||
int##width n = ~((x == x) & (y == y)) & QNANBITPATT_SP32; \
|
||||
/* Calculate x-y if x>y, otherwise positive 0, again taking */ \
|
||||
/* advantage of vector true being all-bits-set. */ \
|
||||
int##width r = (x > y) & as_int##width(x - y); \
|
||||
return as_float##width(n | r); \
|
||||
}
|
||||
__CLC_FDIM_VEC(2)
|
||||
__CLC_FDIM_VEC(3)
|
||||
__CLC_FDIM_VEC(4)
|
||||
__CLC_FDIM_VEC(8)
|
||||
__CLC_FDIM_VEC(16)
|
||||
#undef __CLC_FDIM_VEC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if __CLC_FPSIZE == 64
|
||||
#ifdef __CLC_SCALAR
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, private __CLC_GENTYPE y) {
|
||||
long n = -(isnan(x) | isnan(y)) & QNANBITPATT_DP64;
|
||||
long r = -(x > y) & as_long(x - y);
|
||||
return as_double(n | r);
|
||||
}
|
||||
#define __CLC_FDIM_VEC(width) \
|
||||
_CLC_OVERLOAD _CLC_DEF double##width fdim(double##width x, double##width y) { \
|
||||
/* See comment in float implementation for explanation. */ \
|
||||
long##width n = ~((x == x) & (y == y)) & QNANBITPATT_DP64; \
|
||||
long##width r = (x > y) & as_long##width(x - y); \
|
||||
return as_double##width(n | r); \
|
||||
}
|
||||
__CLC_FDIM_VEC(2)
|
||||
__CLC_FDIM_VEC(3)
|
||||
__CLC_FDIM_VEC(4)
|
||||
__CLC_FDIM_VEC(8)
|
||||
__CLC_FDIM_VEC(16)
|
||||
#undef __CLC_FDIM_VEC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if __CLC_FPSIZE == 16
|
||||
#ifdef __CLC_SCALAR
|
||||
#define QNANBITPATT_FP16 ((short)0x7e00)
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x,
|
||||
private __CLC_GENTYPE y) {
|
||||
short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16;
|
||||
short r = -(x > y) & as_short(x - y);
|
||||
return as_half((short)(n | r));
|
||||
}
|
||||
#define __CLC_FDIM_VEC(width) \
|
||||
_CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) { \
|
||||
/* See comment in float implementation for explanation. */ \
|
||||
short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16; \
|
||||
short##width r = (x > y) & as_short##width(x - y); \
|
||||
return as_half##width(n | r); \
|
||||
}
|
||||
__CLC_FDIM_VEC(2)
|
||||
__CLC_FDIM_VEC(3)
|
||||
__CLC_FDIM_VEC(4)
|
||||
__CLC_FDIM_VEC(8)
|
||||
__CLC_FDIM_VEC(16)
|
||||
#undef __CLC_FDIM_VEC
|
||||
#endif
|
||||
#endif
|
||||
Reference in New Issue
Block a user