[libclc] Move logb/ilogb to CLC library; optimize (#128028)

This commit moves the logb and ilogb builtins to the CLC library. It simultaneously optimizes them both for vector types and for half types. Vector types were being scalarized in some cases. Half types were previously promoting to float, whereas this commit provides them a native implementation. Everything passes the OpenCL-CTS. I had to intuit some magic numbers used by these implementations in order to generate the half variants. I gave them clearer definitions derived from what I believe are their actual component numbers, but named them 'magic' to convey that they weren't derived from first principles.
2025-05-13 11:47:35 +01:00
parent 0e8f0b51ff
commit 95c683fc1b
13 changed files with 259 additions and 99 deletions
--- a/libclc/clc/include/clc/math/clc_ilogb.h
+++ b/libclc/clc/include/clc/math/clc_ilogb.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_ILOGB_H__
+#define __CLC_MATH_CLC_ILOGB_H__
+
+#define __CLC_FUNCTION __clc_ilogb
+#define __CLC_BODY <clc/math/unary_decl_with_int_return.inc>
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_ILOGB_H__
--- a/libclc/clc/include/clc/math/clc_logb.h
+++ b/libclc/clc/include/clc/math/clc_logb.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_LOGB_H__
+#define __CLC_MATH_CLC_LOGB_H__
+
+#define __CLC_FUNCTION __clc_logb
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_LOGB_H__
--- a/libclc/clc/include/clc/math/math.h
+++ b/libclc/clc/include/clc/math/math.h
@@ -52,6 +52,7 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
 #define INDEFBITPATT_SP32 0xffc00000
 #define PINFBITPATT_SP32 0x7f800000
 #define NINFBITPATT_SP32 0xff800000
+#define NUMEXPBITS_SP32 8
 #define EXPBIAS_SP32 127
 #define EXPSHIFTBITS_SP32 23
 #define BIASEDEMIN_SP32 1
@@ -62,6 +63,8 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
 #define MANTLENGTH_SP32 24
 #define BASEDIGITS_SP32 7

+#define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
+
 _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
  int ix = __clc_as_int(x);
  if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
@@ -86,6 +89,7 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
 #define INDEFBITPATT_DP64 0xfff8000000000000L
 #define PINFBITPATT_DP64 0x7ff0000000000000L
 #define NINFBITPATT_DP64 0xfff0000000000000L
+#define NUMEXPBITS_DP64 11
 #define EXPBIAS_DP64 1023
 #define EXPSHIFTBITS_DP64 52
 #define BIASEDEMIN_DP64 1
@@ -96,8 +100,26 @@ _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
 #define MANTLENGTH_DP64 53
 #define BASEDIGITS_DP64 15

+#define LOG_MAGIC_NUM_DP64 (1 + NUMEXPBITS_DP64 - EXPBIAS_DP64)
+
 #endif // cl_khr_fp64

+#ifdef cl_khr_fp16
+
+#define SIGNBIT_FP16 0x8000
+#define EXSIGNBIT_FP16 0x7fff
+#define EXPBITS_FP16 0x7c00
+#define MANTBITS_FP16 0x03ff
+#define PINFBITPATT_FP16 0x7c00
+#define NINFBITPATT_FP16 0xfc00
+#define NUMEXPBITS_FP16 5
+#define EXPBIAS_FP16 15
+#define EXPSHIFTBITS_FP16 10
+
+#define LOG_MAGIC_NUM_FP16 (1 + NUMEXPBITS_FP16 - EXPBIAS_FP16)
+
+#endif // cl_khr_fp16
+
 #define ALIGNED(x) __attribute__((aligned(x)))

 #endif // __CLC_MATH_MATH_H__
--- a/libclc/clc/include/clc/math/unary_decl_with_int_return.inc
+++ b/libclc/clc/include/clc/math/unary_decl_with_int_return.inc
@@ -6,4 +6,4 @@
 //
 //===----------------------------------------------------------------------===//

-_CLC_OVERLOAD _CLC_DECL __CLC_INTN ilogb(__CLC_GENTYPE x);
+_CLC_OVERLOAD _CLC_DECL __CLC_INTN __CLC_FUNCTION(__CLC_GENTYPE x);
--- a/libclc/clc/include/clc/math/unary_def_with_int_return.inc
+++ b/libclc/clc/include/clc/math/unary_def_with_int_return.inc
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN FUNCTION(__CLC_GENTYPE a) {
+  return __CLC_FUNCTION(FUNCTION)(a);
+}
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -69,6 +69,7 @@ math/clc_half_sin.cl
 math/clc_half_sqrt.cl
 math/clc_half_tan.cl
 math/clc_hypot.cl
+math/clc_ilogb.cl
 math/clc_ldexp.cl
 math/clc_lgamma.cl
 math/clc_lgamma_r.cl
@@ -76,6 +77,7 @@ math/clc_log.cl
 math/clc_log10.cl
 math/clc_log1p.cl
 math/clc_log2.cl
+math/clc_logb.cl
 math/clc_mad.cl
 math/clc_maxmag.cl
 math/clc_minmag.cl
--- a/libclc/clc/lib/generic/math/clc_ilogb.cl
+++ b/libclc/clc/lib/generic/math/clc_ilogb.cl
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
+#include <clc/float/definitions.h>
+#include <clc/integer/clc_clz.h>
+#include <clc/internal/clc.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc_ilogb.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_ilogb.inc
+++ b/libclc/clc/lib/generic/math/clc_ilogb.inc
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
+  __CLC_UINTN ux = __CLC_AS_UINTN(x);
+  __CLC_UINTN ax = ux & EXSIGNBIT_SP32;
+  __CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_SP32 -
+                  __CLC_AS_INTN(__clc_clz(ux & MANTBITS_SP32));
+  __CLC_INTN r = __CLC_AS_INTN(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
+  r = ax < 0x00800000U ? rs : r;
+  r = ax == 0 ? FP_ILOGB0 : r;
+
+  // We could merge those 2 tests and have:
+  //
+  //    r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
+  //
+  // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
+  // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
+  r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
+  r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
+  return r;
+}
+
+#endif
+
+#if __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
+  __CLC_ULONGN ux = __CLC_AS_ULONGN(x);
+  __CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
+  __CLC_INTN rs = (__CLC_INTN)LOG_MAGIC_NUM_DP64 -
+                  __CLC_CONVERT_INTN(__clc_clz(ax & MANTBITS_DP64));
+  __CLC_INTN r = __CLC_CONVERT_INTN(ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
+  r = __CLC_CONVERT_INTN(ax < 0x0010000000000000UL) ? rs : r;
+  r = __CLC_CONVERT_INTN(ax == 0UL) ? (__CLC_INTN)FP_ILOGB0 : r;
+
+  // We could merge those 2 tests and have:
+  //
+  //    r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
+  //
+  // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
+  // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
+  r = __CLC_CONVERT_INTN(ax > 0x7ff0000000000000UL) ? FP_ILOGBNAN : r;
+  r = __CLC_CONVERT_INTN(ax == 0x7ff0000000000000UL) ? 0x7fffffff : r;
+  return r;
+}
+
+#endif
+
+#if __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_INTN __clc_ilogb(__CLC_GENTYPE x) {
+  __CLC_USHORTN ux = __CLC_AS_USHORTN(x);
+  __CLC_USHORTN ax = ux & (__CLC_USHORTN)EXSIGNBIT_FP16;
+  __CLC_USHORTN mantx = ux & (__CLC_USHORTN)MANTBITS_FP16;
+  __CLC_INTN rs =
+      (__CLC_INTN)LOG_MAGIC_NUM_FP16 - __CLC_CONVERT_INTN(__clc_clz(mantx));
+  __CLC_INTN r =
+      __CLC_CONVERT_INTN(ax >> (__CLC_USHORTN)EXPSHIFTBITS_FP16) - EXPBIAS_FP16;
+  r = __CLC_CONVERT_INTN(ax < (__CLC_USHORTN)0x0400U) ? rs : r;
+  r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)0) ? (__CLC_INTN)FP_ILOGB0 : r;
+
+  // We could merge those 2 tests and have:
+  //
+  //    r = ax >= EXPBITS_FP16 ? 0x7fffffff : r
+  //
+  // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
+  // FP_ILOGBNAN can change without requiring changes to __clc_ilogb() code.
+  r = __CLC_CONVERT_INTN(ax > (__CLC_USHORTN)EXPBITS_FP16) ? FP_ILOGBNAN : r;
+  r = __CLC_CONVERT_INTN(ax == (__CLC_USHORTN)EXPBITS_FP16) ? 0x7fffffff : r;
+  return r;
+}
+
+#endif
--- a/libclc/clc/lib/generic/math/clc_logb.cl
+++ b/libclc/clc/lib/generic/math/clc_logb.cl
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
+#include <clc/float/definitions.h>
+#include <clc/integer/clc_clz.h>
+#include <clc/internal/clc.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc_logb.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_logb.inc
+++ b/libclc/clc/lib/generic/math/clc_logb.inc
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
+  __CLC_INTN ax = __CLC_AS_INTN(x) & EXSIGNBIT_SP32;
+  __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_SP32 - __clc_clz(ax));
+  __CLC_GENTYPE r =
+      __CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32);
+  r = ax >= PINFBITPATT_SP32 ? __CLC_AS_GENTYPE(ax) : r;
+  r = ax < 0x00800000 ? s : r;
+  r = ax == 0 ? __CLC_AS_GENTYPE((__CLC_INTN)NINFBITPATT_SP32) : r;
+  return r;
+}
+
+#endif
+
+#if __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
+  __CLC_LONGN ax = __CLC_AS_LONGN(x) & EXSIGNBIT_DP64;
+  __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE(LOG_MAGIC_NUM_DP64 - __clc_clz(ax));
+  __CLC_GENTYPE r =
+      __CLC_CONVERT_GENTYPE((ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
+  r = ax >= PINFBITPATT_DP64 ? __CLC_AS_GENTYPE(ax) : r;
+  r = ax < 0x0010000000000000L ? s : r;
+  r = ax == 0L ? __CLC_AS_GENTYPE((__CLC_LONGN)NINFBITPATT_DP64) : r;
+  return r;
+}
+
+#endif
+
+#if __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_logb(__CLC_GENTYPE x) {
+  __CLC_SHORTN ax = __CLC_AS_SHORTN(x) & (__CLC_SHORTN)EXSIGNBIT_FP16;
+  __CLC_GENTYPE s = __CLC_CONVERT_GENTYPE((__CLC_SHORTN)LOG_MAGIC_NUM_FP16 -
+                                          (__CLC_SHORTN)__clc_clz(ax));
+  __CLC_GENTYPE r = __CLC_CONVERT_GENTYPE(
+      (ax >> (__CLC_SHORTN)EXPSHIFTBITS_FP16) - (__CLC_SHORTN)EXPBIAS_FP16);
+  r = ax >= (__CLC_SHORTN)PINFBITPATT_FP16 ? __CLC_AS_GENTYPE(ax) : r;
+  r = ax < (__CLC_SHORTN)0x0400 ? s : r;
+  r = ax == (__CLC_SHORTN)0 ? __CLC_AS_GENTYPE((__CLC_SHORTN)NINFBITPATT_FP16)
+                            : r;
+  return r;
+}
+
+#endif
--- a/libclc/generic/include/clc/math/ilogb.h
+++ b/libclc/generic/include/clc/math/ilogb.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//

-#define __CLC_BODY <clc/math/ilogb.inc>
+#define __CLC_FUNCTION ilogb
+#define __CLC_BODY <clc/math/unary_decl_with_int_return.inc>

 #include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
--- a/libclc/generic/lib/math/ilogb.cl
+++ b/libclc/generic/lib/math/ilogb.cl
@@ -7,64 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_ilogb.h>

-_CLC_OVERLOAD _CLC_DEF int ilogb(float x) {
-    uint ux = as_uint(x);
-    uint ax = ux & EXSIGNBIT_SP32;
-    int rs = -118 - (int) clz(ux & MANTBITS_SP32);
-    int r = (int) (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
-    r = ax < 0x00800000U ? rs : r;
-    r = ax == 0 ? FP_ILOGB0 : r;
-
-    // We could merge those 2 tests and have:
-    //
-    //    r = ax >= EXPBITS_SP32 ? 0x7fffffff : r
-    //
-    // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
-    // FP_ILOGBNAN can change without requiring changes to ilogb() code.
-    r = ax > EXPBITS_SP32 ? FP_ILOGBNAN : r;
-    r = ax == EXPBITS_SP32 ? 0x7fffffff : r;
-    return r;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
-    ulong ux = as_ulong(x);
-    ulong ax = ux & ~SIGNBIT_DP64;
-    int r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
-    int rs = -1011 - (int) clz(ax & MANTBITS_DP64);
-    r = ax < 0x0010000000000000UL ? rs : r;
-    r = ax == 0UL ? FP_ILOGB0 : r;
-
-    // We could merge those 2 tests and have:
-    //
-    //    r = ax >= 0x7ff0000000000000UL ? 0x7fffffff : r
-    //
-    // since FP_ILOGBNAN is set to INT_MAX, but it's clearer this way and
-    // FP_ILOGBNAN can change without requiring changes to ilogb() code.
-    r = ax > 0x7ff0000000000000UL ? FP_ILOGBNAN : r;
-    r = ax == 0x7ff0000000000000UL ? 0x7fffffff : r;
-    return r;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
-
-#endif // cl_khr_fp64
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
-    return ilogb((float)x);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
-
-#endif
+#define FUNCTION ilogb
+#define __CLC_BODY <clc/math/unary_def_with_int_return.inc>
+#include <clc/math/gentype.inc>
--- a/libclc/generic/lib/math/logb.cl
+++ b/libclc/generic/lib/math/logb.cl
@@ -7,41 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_logb.h>

-_CLC_OVERLOAD _CLC_DEF float logb(float x) {
-    int ax = as_int(x) & EXSIGNBIT_SP32;
-    float s = -118 - clz(ax);
-    float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
-    r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r;
-    r = ax < 0x00800000 ? s : r;
-    r = ax == 0 ? as_float(NINFBITPATT_SP32) : r;
-    return r;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double logb(double x) {
-    long ax = as_long(x) & EXSIGNBIT_DP64;
-    double s = -1011L - clz(ax);
-    double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
-    r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r;
-    r = ax < 0x0010000000000000L ? s : r;
-    r = ax == 0L ? as_double(NINFBITPATT_DP64) : r;
-    return r;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
-
-#endif
+#define FUNCTION logb
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>