[libclc] Move min/max/clamp into the CLC builtins library (#114386)

These functions are "shared" between integer and floating-point types,
hence the directory name. They are used in several CLC internal
functions such as __clc_ldexp.

Note that clspv and spirv targets don't want to define these functions,
so pre-processor macros replace calls to __clc_min with regular min, for
example. This means they can use as much of the generic CLC source files
as possible, but where CLC functions would usually call out to an
external __clc_min symbol, they call out to an external min symbol. Then
they opt out of defining __clc_min itself in their CLC builtins library.

Preprocessor definitions for these targets have also been changed
somewhat: what used to be CLC_SPIRV (the 32-bit target) is now
CLC_SPIRV32, and CLC_SPIRV now represents either CLC_SPIRV32 or
CLC_SPIRV64. Same goes for CLC_CLSPV.

There are no differences (measured with llvm-diff) in any of the final
builtins libraries for nvptx, amdgpu, or clspv. Neither are there
differences in the SPIR-V targets' LLVM IR before it's actually lowered
to SPIR-V.
This commit is contained in:
Fraser Cormack
2024-10-31 16:45:37 +00:00
committed by GitHub
parent 9fb4bc5bf4
commit d12a8da1de
29 changed files with 164 additions and 23 deletions

View File

@@ -321,21 +321,30 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
message( STATUS " device: ${d} ( ${${d}_aliases} )" )
if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
set( build_flags -O0 -finline-hint-functions )
set( build_flags -O0 -finline-hint-functions -DCLC_SPIRV )
set( opt_flags )
set( spvflags --spirv-max-version=1.1 )
set( MACRO_ARCH SPIRV32 )
if( ARCH STREQUAL spirv64 )
set( MACRO_ARCH SPIRV64 )
endif()
elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
set( build_flags "-Wno-unknown-assumption")
set( build_flags "-Wno-unknown-assumption" -DCLC_CLSPV )
set( opt_flags -O3 )
set( MACRO_ARCH CLSPV32 )
if( ARCH STREQUAL clspv64 )
set( MACRO_ARCH CLSPV64 )
endif()
else()
set( build_flags )
set( opt_flags -O3 )
set( MACRO_ARCH ${ARCH} )
endif()
set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )
string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE )
string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE )
list( APPEND build_flags
-D__CLC_INTERNAL

View File

@@ -7,9 +7,9 @@
// avoid inlines for SPIR-V related targets since we'll optimise later in the
// chain
#if defined(CLC_SPIRV) || defined(CLC_SPIRV64)
#if defined(CLC_SPIRV)
#define _CLC_DEF
#elif defined(CLC_CLSPV) || defined(CLC_CLSPV64)
#elif defined(CLC_CLSPV)
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
#else
#define _CLC_DEF __attribute__((always_inline))

View File

@@ -1,5 +1,5 @@
//These 2 defines only change when switching between data sizes or base types to
//keep this file manageable.
// These 2 defines only change when switching between data sizes or base types
// to keep this file manageable.
#define __CLC_GENSIZE 8
#define __CLC_SCALAR_GENTYPE char

View File

@@ -0,0 +1,15 @@
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
// clspv and spir-v targets provide their own OpenCL-compatible clamp
#define __clc_clamp clamp
#else
#include <clc/clcfunc.h>
#include <clc/clctypes.h>
#define __CLC_BODY <clc/shared/clc_clamp.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc/shared/clc_clamp.inc>
#include <clc/math/gentype.inc>
#endif

View File

@@ -0,0 +1,9 @@
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
__CLC_GENTYPE y,
__CLC_GENTYPE z);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
__CLC_SCALAR_GENTYPE y,
__CLC_SCALAR_GENTYPE z);
#endif

View File

@@ -0,0 +1,12 @@
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
// clspv and spir-v targets provide their own OpenCL-compatible max
#define __clc_max max
#else
#define __CLC_BODY <clc/shared/clc_max.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc/shared/clc_max.inc>
#include <clc/math/gentype.inc>
#endif

View File

@@ -0,0 +1,7 @@
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
__CLC_GENTYPE b);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b);
#endif

View File

@@ -0,0 +1,12 @@
#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
// clspv and spir-v targets provide their own OpenCL-compatible min
#define __clc_min min
#else
#define __CLC_BODY <clc/shared/clc_min.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc/shared/clc_min.inc>
#include <clc/math/gentype.inc>
#endif

View File

@@ -0,0 +1,7 @@
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
__CLC_GENTYPE b);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b);
#endif

View File

@@ -1 +1,4 @@
geometric/clc_dot.cl
shared/clc_clamp.cl
shared/clc_max.cl
shared/clc_min.cl

View File

@@ -0,0 +1,7 @@
#include <clc/internal/clc.h>
#define __CLC_BODY <clc_clamp.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc_clamp.inc>
#include <clc/math/gentype.inc>

View File

@@ -0,0 +1,14 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
__CLC_GENTYPE y,
__CLC_GENTYPE z) {
return (x > z ? z : (x < y ? y : x));
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x,
__CLC_SCALAR_GENTYPE y,
__CLC_SCALAR_GENTYPE z) {
return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z
: (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
}
#endif

View File

@@ -0,0 +1,7 @@
#include <clc/internal/clc.h>
#define __CLC_BODY <clc_max.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc_max.inc>
#include <clc/math/gentype.inc>

View File

@@ -0,0 +1,11 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
__CLC_GENTYPE b) {
return (a > b ? a : b);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b) {
return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
}
#endif

View File

@@ -0,0 +1,7 @@
#include <clc/internal/clc.h>
#define __CLC_BODY <clc_min.inc>
#include <clc/integer/gentype.inc>
#define __CLC_BODY <clc_min.inc>
#include <clc/math/gentype.inc>

View File

@@ -0,0 +1,11 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
__CLC_GENTYPE b) {
return (b < a ? b : a);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b) {
return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
}
#endif

View File

@@ -20,6 +20,8 @@
* THE SOFTWARE.
*/
#include <clc/clcfunc.h>
_CLC_DECL bool __clc_subnormals_disabled();
_CLC_DECL bool __clc_fp16_subnormals_supported();
_CLC_DECL bool __clc_fp32_subnormals_supported();

View File

@@ -46,7 +46,7 @@ SMOOTH_STEP_DEF(double, double, SMOOTH_STEP_IMPL_D);
_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, smoothstep, double, double, double);
#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64)
#if !defined(CLC_SPIRV)
SMOOTH_STEP_DEF(float, double, SMOOTH_STEP_IMPL_D);
SMOOTH_STEP_DEF(double, float, SMOOTH_STEP_IMPL_D);

View File

@@ -45,7 +45,7 @@ STEP_DEF(double, double);
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
_CLC_V_S_V_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, step, double, double);
#if !defined(CLC_SPIRV) && !defined(CLC_SPIRV64)
#if !defined(CLC_SPIRV)
STEP_DEF(float, double);
STEP_DEF(double, float);

View File

@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
#include <clc/shared/clc_clamp.h>
#include <math/clc_hypot.h>
#include "config.h"
@@ -39,7 +40,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
ux = c ? aux : auy;
uy = c ? auy : aux;
int xexp = clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126);
int xexp =
__clc_clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126);
float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
float fx = as_float(ux) * fi_exp;

View File

@@ -20,10 +20,11 @@
* THE SOFTWARE.
*/
#include <clc/clc.h>
#include "config.h"
#include "../clcmacro.h"
#include "config.h"
#include "math.h"
#include <clc/clc.h>
#include <clc/shared/clc_clamp.h>
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
@@ -35,7 +36,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
int m = i & 0x007fffff;
int s = i & 0x80000000;
int v = add_sat(e, n);
v = clamp(v, 0, 0xff);
v = __clc_clamp(v, 0, 0xff);
int mr = e == 0 | v == 0 | v == 0xff ? 0 : m;
int c = e == 0xff;
mr = c ? m : mr;
@@ -110,7 +111,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
ux = c ? ux : l;
int v = e + n;
v = clamp(v, -0x7ff, 0x7ff);
v = __clc_clamp(v, -0x7ff, 0x7ff);
ux &= ~EXPBITS_DP64;

View File

@@ -40,7 +40,7 @@
#if (defined __AMDGCN__ || defined __R600__) && !defined __HAS_FMAF__
#define HAVE_HW_FMA32() (0)
#elif defined CLC_SPIRV || defined CLC_SPIRV64
#elif defined(CLC_SPIRV)
bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
#define HAVE_HW_FMA32() __clc_runtime_has_hw_fma32()
#else

View File

@@ -1,4 +1,5 @@
#include <clc/clc.h>
#include <clc/shared/clc_clamp.h>
#define __CLC_BODY <clamp.inc>
#include <clc/integer/gentype.inc>

View File

@@ -1,9 +1,9 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
return (x > z ? z : (x < y ? y : x));
return __clc_clamp(x, y, z);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
return __clc_clamp(x, y, z);
}
#endif

View File

@@ -1,4 +1,5 @@
#include <clc/clc.h>
#include <clc/shared/clc_max.h>
#define __CLC_BODY <max.inc>
#include <clc/integer/gentype.inc>

View File

@@ -1,9 +1,10 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) {
return (a > b ? a : b);
return __clc_max(a, b);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b) {
return __clc_max(a, b);
}
#endif

View File

@@ -1,4 +1,5 @@
#include <clc/clc.h>
#include <clc/shared/clc_min.h>
#define __CLC_BODY <min.inc>
#include <clc/integer/gentype.inc>

View File

@@ -1,9 +1,10 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) {
return (b < a ? b : a);
return __clc_min(a, b);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a);
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a,
__CLC_SCALAR_GENTYPE b) {
return __clc_min(a, b);
}
#endif