[libclc] Move fdim to CLC library; simplify (#137811)

This commit moves the fdim builtin to the CLC library. It simultaneously simplifies the codegen, unifying it between scalar and vector and avoiding bithacking for vector types.
2025-04-29 16:41:07 +01:00
parent bd3dde0f87
commit ea688c031e
7 changed files with 61 additions and 84 deletions
--- a/libclc/clc/include/clc/math/clc_fdim.h
+++ b/libclc/clc/include/clc/math/clc_fdim.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_FDIM_H__
+#define __CLC_MATH_CLC_FDIM_H__
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_fdim
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_FDIM_H__
--- a/libclc/clc/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
@@ -57,6 +57,8 @@
 #define __CLC_CONVERT_UINTN __CLC_XCONCAT(__clc_convert_, __CLC_UINTN)
 #define __CLC_CONVERT_ULONGN __CLC_XCONCAT(__clc_convert_, __CLC_ULONGN)

+#define __CLC_CONVERT_BIT_INTN __CLC_XCONCAT(__clc_convert_, __CLC_BIT_INTN)
+
 // See definitions of __CLC_S_GENTYPE/__CLC_U_GENTYPE below, which depend on the
 // specific size of floating-point type. These are the signed and unsigned
 // integers of the same bitwidth and element count as the GENTYPE. They match
@@ -329,6 +331,8 @@
 #undef __CLC_CONVERT_UINTN
 #undef __CLC_CONVERT_ULONGN

+#undef __CLC_CONVERT_BIT_INTN
+
 #undef __CLC_ULONGN
 #undef __CLC_UINTN
 #undef __CLC_USHORTN
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -41,6 +41,7 @@ math/clc_exp2.cl
 math/clc_expm1.cl
 math/clc_exp_helper.cl
 math/clc_fabs.cl
+math/clc_fdim.cl
 math/clc_fma.cl
 math/clc_fmax.cl
 math/clc_fmin.cl
--- a/libclc/clc/lib/generic/math/clc_fdim.cl
+++ b/libclc/clc/lib/generic/math/clc_fdim.cl
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fmax.h>
+#include <clc/math/math.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/relational/clc_select.h>
+
+#define __CLC_BODY <clc_fdim.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_fdim.inc
+++ b/libclc/clc/lib/generic/math/clc_fdim.inc
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fdim(__CLC_GENTYPE x,
+                                                __CLC_GENTYPE y) {
+  return __clc_select(
+      __builtin_elementwise_max(x - y, (__CLC_GENTYPE)__CLC_FP_LIT(0.0)),
+      __CLC_GENTYPE_NAN,
+      __CLC_CONVERT_BIT_INTN(__clc_isnan(x) || __clc_isnan(y)));
+}
--- a/libclc/generic/lib/math/fdim.cl
+++ b/libclc/generic/lib/math/fdim.cl
@@ -7,7 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_fdim.h>

-#define __CLC_BODY <fdim.inc>
+#define FUNCTION fdim
+#define __CLC_BODY <clc/shared/binary_def.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/generic/lib/math/fdim.inc
+++ b/libclc/generic/lib/math/fdim.inc
@@ -1,82 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-#ifdef __CLC_SCALAR
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, __CLC_GENTYPE y) {
-    if (__builtin_isnan(x) || __builtin_isnan(y))
-        return as_float(QNANBITPATT_SP32);
-    return fmax(x - y, 0.0f);
-}
-#define __CLC_FDIM_VEC(width) \
-_CLC_OVERLOAD _CLC_DEF float##width fdim(float##width x, float##width y) { \
-    /* Determine if x or y is NaN. */ \
-    /* Vector true is -1, i.e. all-bits-set, and NaN==NaN is false. */ \
-    /* If either is NaN, then ~((x==x) & (y==y)) will be 0 (e.g. ~(-1)), as will n. */ \
-    int##width n = ~((x == x) & (y == y)) & QNANBITPATT_SP32; \
-    /* Calculate x-y if x>y, otherwise positive 0, again taking */ \
-    /* advantage of vector true being all-bits-set. */ \
-    int##width r = (x > y) & as_int##width(x - y); \
-    return as_float##width(n | r); \
-}
-__CLC_FDIM_VEC(2)
-__CLC_FDIM_VEC(3)
-__CLC_FDIM_VEC(4)
-__CLC_FDIM_VEC(8)
-__CLC_FDIM_VEC(16)
-#undef __CLC_FDIM_VEC
-#endif
-#endif
-
-#if __CLC_FPSIZE == 64
-#ifdef __CLC_SCALAR
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, private __CLC_GENTYPE y) {
-    long n = -(isnan(x) | isnan(y)) & QNANBITPATT_DP64;
-    long r = -(x > y) & as_long(x - y);
-    return as_double(n | r);
-}
-#define __CLC_FDIM_VEC(width) \
-_CLC_OVERLOAD _CLC_DEF double##width fdim(double##width x, double##width y) { \
-    /* See comment in float implementation for explanation. */ \
-    long##width n = ~((x == x) & (y == y)) & QNANBITPATT_DP64; \
-    long##width r = (x > y) & as_long##width(x - y); \
-    return as_double##width(n | r); \
-}
-__CLC_FDIM_VEC(2)
-__CLC_FDIM_VEC(3)
-__CLC_FDIM_VEC(4)
-__CLC_FDIM_VEC(8)
-__CLC_FDIM_VEC(16)
-#undef __CLC_FDIM_VEC
-#endif
-#endif
-
-#if __CLC_FPSIZE == 16
-#ifdef __CLC_SCALAR
-#define QNANBITPATT_FP16 ((short)0x7e00)
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x,
-                                          private __CLC_GENTYPE y) {
-  short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16;
-  short r = -(x > y) & as_short(x - y);
-  return as_half((short)(n | r));
-}
-#define __CLC_FDIM_VEC(width)                                                  \
-  _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) {      \
-    /* See comment in float implementation for explanation. */                 \
-    short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16;                \
-    short##width r = (x > y) & as_short##width(x - y);                         \
-    return as_half##width(n | r);                                              \
-  }
-__CLC_FDIM_VEC(2)
-__CLC_FDIM_VEC(3)
-__CLC_FDIM_VEC(4)
-__CLC_FDIM_VEC(8)
-__CLC_FDIM_VEC(16)
-#undef __CLC_FDIM_VEC
-#endif
-#endif