[libclc] Move conversion builtins to the CLC library (#124727)
This commit moves the implementations of conversion builtins to the CLC library. It keeps the dichotomy of regular vs. clspv implementations of the conversions. However, for the sake of a consistent interface all CLC conversion routines are built, even the ones that clspv opts out of in the user-facing OpenCL layer. It simultaneously updates the python script to use f-strings for formatting.
This commit is contained in:
@@ -247,11 +247,27 @@ add_custom_target( "generate_convert.cl" DEPENDS convert.cl )
|
||||
set_target_properties( "generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT clspv-convert.cl
|
||||
COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
|
||||
OUTPUT clc-convert.cl
|
||||
COMMAND ${Python3_EXECUTABLE} ${script_loc} --clc > clc-convert.cl
|
||||
DEPENDS ${script_loc} )
|
||||
add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
|
||||
set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
|
||||
add_custom_target( "clc-generate_convert.cl" DEPENDS clc-convert.cl )
|
||||
set_target_properties( "clc-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
|
||||
|
||||
if ( clspv-- IN_LIST LIBCLC_TARGETS_TO_BUILD OR clspv64-- IN_LIST LIBCLC_TARGETS_TO_BUILD )
|
||||
add_custom_command(
|
||||
OUTPUT clspv-convert.cl
|
||||
COMMAND ${Python3_EXECUTABLE} ${script_loc} --clspv > clspv-convert.cl
|
||||
DEPENDS ${script_loc} )
|
||||
add_custom_target( "clspv-generate_convert.cl" DEPENDS clspv-convert.cl )
|
||||
set_target_properties( "clspv-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT clc-clspv-convert.cl
|
||||
COMMAND ${Python3_EXECUTABLE} ${script_loc} --clc --clspv > clc-clspv-convert.cl
|
||||
DEPENDS ${script_loc} )
|
||||
add_custom_target( "clc-clspv-generate_convert.cl" DEPENDS clc-clspv-convert.cl )
|
||||
set_target_properties( "clc-clspv-generate_convert.cl" PROPERTIES FOLDER "libclc/Sourcegenning" )
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
|
||||
@@ -289,6 +305,12 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
|
||||
set( clc_lib_files )
|
||||
set( clc_dirs ${dirs} generic )
|
||||
|
||||
if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
|
||||
set( clc_gen_files clc-clspv-convert.cl )
|
||||
else()
|
||||
set( clc_gen_files clc-convert.cl )
|
||||
endif()
|
||||
|
||||
libclc_configure_lib_source(
|
||||
clc_lib_files
|
||||
CLC_INTERNAL
|
||||
@@ -372,6 +394,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
|
||||
COMPILE_FLAGS ${clc_build_flags}
|
||||
OPT_FLAGS ${opt_flags}
|
||||
LIB_FILES ${clc_lib_files}
|
||||
GEN_FILES ${clc_gen_files}
|
||||
)
|
||||
|
||||
list( APPEND build_flags
|
||||
|
||||
98
libclc/clc/include/clc/clc_convert.h
Normal file
98
libclc/clc/include/clc/clc_convert.h
Normal file
@@ -0,0 +1,98 @@
|
||||
#ifndef __CLC_CLC_CONVERT_H__
|
||||
#define __CLC_CLC_CONVERT_H__
|
||||
|
||||
#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##16, TO_TYPE##16, SUFFIX)
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
|
||||
|
||||
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
|
||||
#elif defined(cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
|
||||
#elif defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
|
||||
#else
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
|
||||
#endif
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
|
||||
|
||||
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
|
||||
#elif defined(cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
|
||||
#elif defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
|
||||
#else
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) _CLC_VECTOR_CONVERT_TO1(SUFFIX)
|
||||
#endif
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_TO_SUFFIX(ROUND) \
|
||||
_CLC_VECTOR_CONVERT_TO(_sat##ROUND) \
|
||||
_CLC_VECTOR_CONVERT_TO(ROUND)
|
||||
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtn)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rte)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtz)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtp)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX()
|
||||
|
||||
#undef _CLC_VECTOR_CONVERT_TO_SUFFIX
|
||||
#undef _CLC_VECTOR_CONVERT_TO
|
||||
#undef _CLC_VECTOR_CONVERT_TO1
|
||||
#undef _CLC_VECTOR_CONVERT_FROM
|
||||
#undef _CLC_VECTOR_CONVERT_FROM1
|
||||
#undef _CLC_VECTOR_CONVERT_DECL
|
||||
#undef _CLC_CONVERT_DECL
|
||||
|
||||
#endif // __CLC_CLC_CONVERT_H__
|
||||
88
libclc/clc/include/clc/float/definitions.h
Normal file
88
libclc/clc/include/clc/float/definitions.h
Normal file
@@ -0,0 +1,88 @@
|
||||
#define MAXFLOAT 0x1.fffffep127f
|
||||
#define HUGE_VALF __builtin_huge_valf()
|
||||
#define INFINITY __builtin_inff()
|
||||
#define NAN __builtin_nanf("")
|
||||
|
||||
#define FLT_DIG 6
|
||||
#define FLT_MANT_DIG 24
|
||||
#define FLT_MAX_10_EXP +38
|
||||
#define FLT_MAX_EXP +128
|
||||
#define FLT_MIN_10_EXP -37
|
||||
#define FLT_MIN_EXP -125
|
||||
#define FLT_RADIX 2
|
||||
#define FLT_MAX MAXFLOAT
|
||||
#define FLT_MIN 0x1.0p-126f
|
||||
#define FLT_EPSILON 0x1.0p-23f
|
||||
|
||||
#define FP_ILOGB0 (-2147483647 - 1)
|
||||
#define FP_ILOGBNAN 2147483647
|
||||
|
||||
#define M_E_F 0x1.5bf0a8p+1f
|
||||
#define M_LOG2E_F 0x1.715476p+0f
|
||||
#define M_LOG10E_F 0x1.bcb7b2p-2f
|
||||
#define M_LN2_F 0x1.62e430p-1f
|
||||
#define M_LN10_F 0x1.26bb1cp+1f
|
||||
#define M_PI_F 0x1.921fb6p+1f
|
||||
#define M_PI_2_F 0x1.921fb6p+0f
|
||||
#define M_PI_4_F 0x1.921fb6p-1f
|
||||
#define M_1_PI_F 0x1.45f306p-2f
|
||||
#define M_2_PI_F 0x1.45f306p-1f
|
||||
#define M_2_SQRTPI_F 0x1.20dd76p+0f
|
||||
#define M_SQRT2_F 0x1.6a09e6p+0f
|
||||
#define M_SQRT1_2_F 0x1.6a09e6p-1f
|
||||
|
||||
#define M_LOG210_F 0x1.a934f0p+1f
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#define HUGE_VAL __builtin_huge_val()
|
||||
|
||||
#define DBL_DIG 15
|
||||
#define DBL_MANT_DIG 53
|
||||
#define DBL_MAX_10_EXP +308
|
||||
#define DBL_MAX_EXP +1024
|
||||
#define DBL_MIN_10_EXP -307
|
||||
#define DBL_MIN_EXP -1021
|
||||
#define DBL_MAX 0x1.fffffffffffffp1023
|
||||
#define DBL_MIN 0x1.0p-1022
|
||||
#define DBL_EPSILON 0x1.0p-52
|
||||
|
||||
#define M_E 0x1.5bf0a8b145769p+1
|
||||
#define M_LOG2E 0x1.71547652b82fep+0
|
||||
#define M_LOG10E 0x1.bcb7b1526e50ep-2
|
||||
#define M_LN2 0x1.62e42fefa39efp-1
|
||||
#define M_LN10 0x1.26bb1bbb55516p+1
|
||||
#define M_PI 0x1.921fb54442d18p+1
|
||||
#define M_PI_2 0x1.921fb54442d18p+0
|
||||
#define M_PI_4 0x1.921fb54442d18p-1
|
||||
#define M_1_PI 0x1.45f306dc9c883p-2
|
||||
#define M_2_PI 0x1.45f306dc9c883p-1
|
||||
#define M_2_SQRTPI 0x1.20dd750429b6dp+0
|
||||
#define M_SQRT2 0x1.6a09e667f3bcdp+0
|
||||
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
|
||||
|
||||
#ifdef __CLC_INTERNAL
|
||||
#define M_LOG210 0x1.a934f0979a371p+1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#if __OPENCL_VERSION__ >= 120
|
||||
|
||||
#define HALF_DIG 3
|
||||
#define HALF_MANT_DIG 11
|
||||
#define HALF_MAX_10_EXP +4
|
||||
#define HALF_MAX_EXP +16
|
||||
#define HALF_MIN_10_EXP -4
|
||||
#define HALF_MIN_EXP -13
|
||||
|
||||
#define HALF_RADIX 2
|
||||
#define HALF_MAX 0x1.ffcp15h
|
||||
#define HALF_MIN 0x1.0p-14h
|
||||
#define HALF_EPSILON 0x1.0p-10h
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,74 +1,81 @@
|
||||
#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_OVERLOAD _CLC_DECL TO_TYPE convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX) \
|
||||
#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX) \
|
||||
_CLC_CONVERT_DECL(FROM_TYPE##16, TO_TYPE##16, SUFFIX)
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
|
||||
#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
|
||||
|
||||
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
|
||||
#elif defined(cl_khr_fp64)
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
|
||||
#elif defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
|
||||
#else
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
|
||||
#endif
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
|
||||
#define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(short, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ushort, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(long, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
|
||||
|
||||
#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
|
||||
#elif defined(cl_khr_fp64)
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
|
||||
#elif defined(cl_khr_fp16)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
|
||||
#else
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
|
||||
_CLC_VECTOR_CONVERT_TO1(SUFFIX)
|
||||
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) _CLC_VECTOR_CONVERT_TO1(SUFFIX)
|
||||
#endif
|
||||
|
||||
#define _CLC_VECTOR_CONVERT_TO_SUFFIX(ROUND) \
|
||||
_CLC_VECTOR_CONVERT_TO(_sat##ROUND) \
|
||||
#define _CLC_VECTOR_CONVERT_TO_SUFFIX(ROUND) \
|
||||
_CLC_VECTOR_CONVERT_TO(_sat##ROUND) \
|
||||
_CLC_VECTOR_CONVERT_TO(ROUND)
|
||||
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtn)
|
||||
@@ -76,3 +83,11 @@ _CLC_VECTOR_CONVERT_TO_SUFFIX(_rte)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtz)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtp)
|
||||
_CLC_VECTOR_CONVERT_TO_SUFFIX()
|
||||
|
||||
#undef _CLC_VECTOR_CONVERT_TO_SUFFIX
|
||||
#undef _CLC_VECTOR_CONVERT_TO
|
||||
#undef _CLC_VECTOR_CONVERT_TO1
|
||||
#undef _CLC_VECTOR_CONVERT_FROM
|
||||
#undef _CLC_VECTOR_CONVERT_FROM1
|
||||
#undef _CLC_VECTOR_CONVERT_DECL
|
||||
#undef _CLC_CONVERT_DECL
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
#define MAXFLOAT 0x1.fffffep127f
|
||||
#define HUGE_VALF __builtin_huge_valf()
|
||||
#define INFINITY __builtin_inff()
|
||||
#define NAN __builtin_nanf("")
|
||||
|
||||
#define FLT_DIG 6
|
||||
#define FLT_MANT_DIG 24
|
||||
#define FLT_MAX_10_EXP +38
|
||||
#define FLT_MAX_EXP +128
|
||||
#define FLT_MIN_10_EXP -37
|
||||
#define FLT_MIN_EXP -125
|
||||
#define FLT_RADIX 2
|
||||
#define FLT_MAX MAXFLOAT
|
||||
#define FLT_MIN 0x1.0p-126f
|
||||
#define FLT_EPSILON 0x1.0p-23f
|
||||
|
||||
#define FP_ILOGB0 (-2147483647 - 1)
|
||||
#define FP_ILOGBNAN 2147483647
|
||||
|
||||
#define M_E_F 0x1.5bf0a8p+1f
|
||||
#define M_LOG2E_F 0x1.715476p+0f
|
||||
#define M_LOG10E_F 0x1.bcb7b2p-2f
|
||||
#define M_LN2_F 0x1.62e430p-1f
|
||||
#define M_LN10_F 0x1.26bb1cp+1f
|
||||
#define M_PI_F 0x1.921fb6p+1f
|
||||
#define M_PI_2_F 0x1.921fb6p+0f
|
||||
#define M_PI_4_F 0x1.921fb6p-1f
|
||||
#define M_1_PI_F 0x1.45f306p-2f
|
||||
#define M_2_PI_F 0x1.45f306p-1f
|
||||
#define M_2_SQRTPI_F 0x1.20dd76p+0f
|
||||
#define M_SQRT2_F 0x1.6a09e6p+0f
|
||||
#define M_SQRT1_2_F 0x1.6a09e6p-1f
|
||||
|
||||
#define M_LOG210_F 0x1.a934f0p+1f
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#define HUGE_VAL __builtin_huge_val()
|
||||
|
||||
#define DBL_DIG 15
|
||||
#define DBL_MANT_DIG 53
|
||||
#define DBL_MAX_10_EXP +308
|
||||
#define DBL_MAX_EXP +1024
|
||||
#define DBL_MIN_10_EXP -307
|
||||
#define DBL_MIN_EXP -1021
|
||||
#define DBL_MAX 0x1.fffffffffffffp1023
|
||||
#define DBL_MIN 0x1.0p-1022
|
||||
#define DBL_EPSILON 0x1.0p-52
|
||||
|
||||
#define M_E 0x1.5bf0a8b145769p+1
|
||||
#define M_LOG2E 0x1.71547652b82fep+0
|
||||
#define M_LOG10E 0x1.bcb7b1526e50ep-2
|
||||
#define M_LN2 0x1.62e42fefa39efp-1
|
||||
#define M_LN10 0x1.26bb1bbb55516p+1
|
||||
#define M_PI 0x1.921fb54442d18p+1
|
||||
#define M_PI_2 0x1.921fb54442d18p+0
|
||||
#define M_PI_4 0x1.921fb54442d18p-1
|
||||
#define M_1_PI 0x1.45f306dc9c883p-2
|
||||
#define M_2_PI 0x1.45f306dc9c883p-1
|
||||
#define M_2_SQRTPI 0x1.20dd750429b6dp+0
|
||||
#define M_SQRT2 0x1.6a09e667f3bcdp+0
|
||||
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
|
||||
|
||||
#ifdef __CLC_INTERNAL
|
||||
#define M_LOG210 0x1.a934f0979a371p+1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#if __OPENCL_VERSION__ >= 120
|
||||
|
||||
#define HALF_DIG 3
|
||||
#define HALF_MANT_DIG 11
|
||||
#define HALF_MAX_10_EXP +4
|
||||
#define HALF_MAX_EXP +16
|
||||
#define HALF_MIN_10_EXP -4
|
||||
#define HALF_MIN_EXP -13
|
||||
|
||||
#define HALF_RADIX 2
|
||||
#define HALF_MAX 0x1.ffcp15h
|
||||
#define HALF_MIN 0x1.0p-14h
|
||||
#define HALF_EPSILON 0x1.0p-10h
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -30,11 +30,15 @@
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--clc", action="store_true", help="Generate clc internal conversions"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clspv", action="store_true", help="Generate the clspv variant of the code"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
clc = args.clc
|
||||
clspv = args.clspv
|
||||
|
||||
types = [
|
||||
@@ -158,8 +162,32 @@ def conditional_guard(src, dst):
|
||||
return False
|
||||
|
||||
|
||||
nl = "\n"
|
||||
includes = []
|
||||
if not clc:
|
||||
includes = ["<clc/clc.h>"]
|
||||
else:
|
||||
includes = sorted(
|
||||
[
|
||||
"<clc/internal/clc.h>",
|
||||
"<clc/integer/definitions.h>",
|
||||
"<clc/float/definitions.h>",
|
||||
"<clc/integer/clc_abs.h>",
|
||||
"<clc/common/clc_sign.h>",
|
||||
"<clc/shared/clc_clamp.h>",
|
||||
"<clc/shared/clc_min.h>",
|
||||
"<clc/shared/clc_max.h>",
|
||||
"<clc/math/clc_fabs.h>",
|
||||
"<clc/math/clc_rint.h>",
|
||||
"<clc/math/clc_ceil.h>",
|
||||
"<clc/math/clc_floor.h>",
|
||||
"<clc/math/clc_nextafter.h>",
|
||||
"<clc/relational/clc_select.h>",
|
||||
]
|
||||
)
|
||||
|
||||
print(
|
||||
"""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
|
||||
f"""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
|
||||
|
||||
DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
|
||||
$ ./generate-conversion-type-cl.sh
|
||||
@@ -188,7 +216,8 @@ print(
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <clc/clc.h>
|
||||
{nl.join(['#include ' + f for f in includes])}
|
||||
#include <clc/clc_convert.h>
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
@@ -210,6 +239,7 @@ print(
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
# Default Conversions
|
||||
#
|
||||
@@ -236,6 +266,13 @@ print(
|
||||
# even for integer-to-integer conversions. When such a conversion
|
||||
# is used, the rounding mode is ignored.
|
||||
#
|
||||
def print_passthru_conversion(src_ty, dst_ty, fn_name):
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dst_ty} {fn_name}({src_ty} x) {{
|
||||
return __clc_{fn_name}(x);
|
||||
}}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def generate_default_conversion(src, dst, mode):
|
||||
@@ -243,26 +280,35 @@ def generate_default_conversion(src, dst, mode):
|
||||
|
||||
for size in vector_sizes:
|
||||
if not size:
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dst} convert_{dst}{mode}({src} x) {{
|
||||
if clc:
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dst} __clc_convert_{dst}{mode}({src} x) {{
|
||||
return ({dst})x;
|
||||
}}
|
||||
"""
|
||||
)
|
||||
)
|
||||
else:
|
||||
print_passthru_conversion(src, dst, f"convert_{dst}{mode}")
|
||||
else:
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} convert_{dst}{size}{mode}({src}{size} x) {{
|
||||
if clc:
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dst}{size} __clc_convert_{dst}{size}{mode}({src}{size} x) {{
|
||||
return __builtin_convertvector(x, {dst}{size});
|
||||
}}
|
||||
"""
|
||||
)
|
||||
)
|
||||
else:
|
||||
print_passthru_conversion(
|
||||
f"{src}{size}", f"{dst}{size}", f"convert_{dst}{size}{mode}"
|
||||
)
|
||||
|
||||
if close_conditional:
|
||||
print("#endif")
|
||||
|
||||
|
||||
# Do not generate default conversion for clspv as they are handled natively
|
||||
if not clspv:
|
||||
# Do not generate user-facing default conversions for clspv as they are handled
|
||||
# natively
|
||||
if clc or not clspv:
|
||||
for src in types:
|
||||
for dst in types:
|
||||
generate_default_conversion(src, dst, "")
|
||||
@@ -270,15 +316,16 @@ if not clspv:
|
||||
for src in int_types:
|
||||
for dst in int_types:
|
||||
for mode in rounding_modes:
|
||||
# Do not generate "_rte" conversion for clspv as they are handled
|
||||
# natively
|
||||
if clspv and mode == "_rte":
|
||||
# Do not generate user-facing "_rte" conversions for clspv as they
|
||||
# are handled natively
|
||||
if clspv and not clc and mode == "_rte":
|
||||
continue
|
||||
generate_default_conversion(src, dst, mode)
|
||||
|
||||
#
|
||||
# Saturated Conversions To Integers
|
||||
#
|
||||
|
||||
|
||||
# These functions are dependent on the unsaturated conversion functions
|
||||
# generated above, and use clamp, max, min, and select to eliminate
|
||||
# branching and vectorize the conversions.
|
||||
@@ -286,35 +333,37 @@ for src in int_types:
|
||||
# Again, as above, we allow all rounding modes for integer-to-integer
|
||||
# conversions with saturation.
|
||||
#
|
||||
|
||||
|
||||
def generate_saturated_conversion(src, dst, size):
|
||||
# Header
|
||||
close_conditional = conditional_guard(src, dst)
|
||||
print(
|
||||
"""_CLC_DEF _CLC_OVERLOAD
|
||||
{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
|
||||
{{""".format(
|
||||
DST=dst, SRC=src, N=size
|
||||
)
|
||||
)
|
||||
|
||||
# FIXME: This is a work around for lack of select function with
|
||||
# signed third argument when the first two arguments are unsigned types.
|
||||
# We cast to the signed type for sign-extension, then do a bitcast to
|
||||
# the unsigned type.
|
||||
dstn = f"{dst}{size}"
|
||||
srcn = f"{src}{size}"
|
||||
|
||||
if not clc:
|
||||
print_passthru_conversion(f"{srcn}", f"{dstn}", f"convert_{dstn}_sat")
|
||||
if close_conditional:
|
||||
print("#endif")
|
||||
return
|
||||
|
||||
print(f"_CLC_DEF _CLC_OVERLOAD {dstn} __clc_convert_{dstn}_sat({srcn} x) {{")
|
||||
|
||||
# FIXME: This is a work around for lack of select function with signed
|
||||
# third argument when the first two arguments are unsigned types. We cast
|
||||
# to the signed type for sign-extension, then do a bitcast to the unsigned
|
||||
# type.
|
||||
if dst in unsigned_types:
|
||||
bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(
|
||||
DST=dst, BOOL=bool_type[dst], N=size
|
||||
)
|
||||
bool_prefix = f"__clc_as_{dstn}(__clc_convert_{bool_type[dst]}{size}"
|
||||
bool_suffix = ")"
|
||||
else:
|
||||
bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size)
|
||||
bool_prefix = f"__clc_convert_{bool_type[dst]}{size}"
|
||||
bool_suffix = ""
|
||||
|
||||
dst_max = limit_max[dst]
|
||||
dst_min = limit_min[dst]
|
||||
|
||||
# Body
|
||||
if src == dst:
|
||||
|
||||
# Conversion between same types
|
||||
print(" return x;")
|
||||
|
||||
@@ -323,69 +372,40 @@ def generate_saturated_conversion(src, dst, size):
|
||||
if clspv:
|
||||
# Conversion from float to int
|
||||
print(
|
||||
""" {DST}{N} y = convert_{DST}{N}(x);
|
||||
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
|
||||
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
|
||||
return y;""".format(
|
||||
SRC=src,
|
||||
DST=dst,
|
||||
N=size,
|
||||
DST_MIN=limit_min[dst],
|
||||
DST_MAX=limit_max[dst],
|
||||
BP=bool_prefix,
|
||||
BS=bool_suffix,
|
||||
)
|
||||
f""" {dstn} y = __clc_convert_{dstn}(x);
|
||||
y = __clc_select(y, ({dstn}){dst_min}, {bool_prefix}(x <= ({srcn}){dst_min}){bool_suffix});
|
||||
y = __clc_select(y, ({dstn}){dst_max}, {bool_prefix}(x >= ({srcn}){dst_max}){bool_suffix});
|
||||
return y;"""
|
||||
)
|
||||
else:
|
||||
# Conversion from float to int
|
||||
print(
|
||||
""" {DST}{N} y = convert_{DST}{N}(x);
|
||||
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
|
||||
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
|
||||
return y;""".format(
|
||||
SRC=src,
|
||||
DST=dst,
|
||||
N=size,
|
||||
DST_MIN=limit_min[dst],
|
||||
DST_MAX=limit_max[dst],
|
||||
BP=bool_prefix,
|
||||
BS=bool_suffix,
|
||||
)
|
||||
f""" {dstn} y = __clc_convert_{dstn}(x);
|
||||
y = __clc_select(y, ({dstn}){dst_min}, {bool_prefix}(x < ({srcn}){dst_min}){bool_suffix});
|
||||
y = __clc_select(y, ({dstn}){dst_max}, {bool_prefix}(x > ({srcn}){dst_max}){bool_suffix});
|
||||
return y;"""
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
# Integer to integer convesion with sizeof(src) == sizeof(dst)
|
||||
if sizeof_type[src] == sizeof_type[dst]:
|
||||
if src in unsigned_types:
|
||||
print(
|
||||
" x = min(x, ({SRC}){DST_MAX});".format(
|
||||
SRC=src, DST_MAX=limit_max[dst]
|
||||
)
|
||||
)
|
||||
print(f" x = __clc_min(x, ({src}){dst_max});")
|
||||
else:
|
||||
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
||||
print(f" x = __clc_max(x, ({src})0);")
|
||||
|
||||
# Integer to integer conversion where sizeof(src) > sizeof(dst)
|
||||
elif sizeof_type[src] > sizeof_type[dst]:
|
||||
if src in unsigned_types:
|
||||
print(
|
||||
" x = min(x, ({SRC}){DST_MAX});".format(
|
||||
SRC=src, DST_MAX=limit_max[dst]
|
||||
)
|
||||
)
|
||||
print(f" x = __clc_min(x, ({src}){dst_max});")
|
||||
else:
|
||||
print(
|
||||
" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
|
||||
SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]
|
||||
)
|
||||
)
|
||||
print(f" x = __clc_clamp(x, ({src}){dst_min}, ({src}){dst_max});")
|
||||
|
||||
# Integer to integer conversion where sizeof(src) < sizeof(dst)
|
||||
elif src not in unsigned_types and dst in unsigned_types:
|
||||
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
||||
print(f" x = __clc_max(x, ({src})0);")
|
||||
|
||||
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||||
print(f" return __clc_convert_{dstn}(x);")
|
||||
|
||||
# Footer
|
||||
print("}")
|
||||
@@ -403,17 +423,19 @@ def generate_saturated_conversion_with_rounding(src, dst, size, mode):
|
||||
# Header
|
||||
close_conditional = conditional_guard(src, dst)
|
||||
|
||||
# Body
|
||||
print(
|
||||
"""_CLC_DEF _CLC_OVERLOAD
|
||||
{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
|
||||
{{
|
||||
return convert_{DST}{N}_sat(x);
|
||||
dstn = f"{dst}{size}"
|
||||
srcn = f"{src}{size}"
|
||||
|
||||
if not clc:
|
||||
print_passthru_conversion(f"{srcn}", f"{dstn}", f"convert_{dstn}_sat{mode}")
|
||||
else:
|
||||
# Body
|
||||
print(
|
||||
f"""_CLC_DEF _CLC_OVERLOAD {dstn} __clc_convert_{dstn}_sat{mode}({srcn} x) {{
|
||||
return __clc_convert_{dstn}_sat(x);
|
||||
}}
|
||||
""".format(
|
||||
DST=dst, SRC=src, N=size, M=mode
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Footer
|
||||
if close_conditional:
|
||||
@@ -426,6 +448,7 @@ for src in int_types:
|
||||
for mode in rounding_modes:
|
||||
generate_saturated_conversion_with_rounding(src, dst, size, mode)
|
||||
|
||||
|
||||
#
|
||||
# Conversions To/From Floating-Point With Rounding
|
||||
#
|
||||
@@ -439,134 +462,90 @@ for src in int_types:
|
||||
#
|
||||
# Only conversions to integers can have saturation.
|
||||
#
|
||||
|
||||
|
||||
def generate_float_conversion(src, dst, size, mode, sat):
|
||||
# Header
|
||||
close_conditional = conditional_guard(src, dst)
|
||||
print(
|
||||
"""_CLC_DEF _CLC_OVERLOAD
|
||||
{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
|
||||
{{""".format(
|
||||
SRC=src, DST=dst, N=size, M=mode, S=sat
|
||||
)
|
||||
)
|
||||
|
||||
dstn = f"{dst}{size}"
|
||||
srcn = f"{src}{size}"
|
||||
booln = f"{bool_type[dst]}{size}"
|
||||
src_max = limit_max[src] if src in limit_max else ""
|
||||
dst_min = limit_min[dst] if dst in limit_min else ""
|
||||
|
||||
if not clc:
|
||||
print_passthru_conversion(f"{srcn}", f"{dstn}", f"convert_{dstn}{sat}{mode}")
|
||||
# Footer
|
||||
if close_conditional:
|
||||
print("#endif")
|
||||
return
|
||||
|
||||
print(f"_CLC_DEF _CLC_OVERLOAD {dstn} __clc_convert_{dstn}{sat}{mode}({srcn} x) {{")
|
||||
|
||||
# Perform conversion
|
||||
if dst in int_types:
|
||||
if mode == "_rte":
|
||||
print(" x = rint(x);")
|
||||
print(" x = __clc_rint(x);")
|
||||
elif mode == "_rtp":
|
||||
print(" x = ceil(x);")
|
||||
print(" x = __clc_ceil(x);")
|
||||
elif mode == "_rtn":
|
||||
print(" x = floor(x);")
|
||||
print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
|
||||
print(" x = __clc_floor(x);")
|
||||
print(f" return __clc_convert_{dstn}{sat}(x);")
|
||||
elif mode == "_rte":
|
||||
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||||
print(f" return __clc_convert_{dstn}(x);")
|
||||
else:
|
||||
print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||||
print(f" {dstn} r = __clc_convert_{dstn}(x);")
|
||||
if clspv:
|
||||
print(" {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
|
||||
print(f" {srcn} y = __clc_convert_{srcn}_sat(r);")
|
||||
else:
|
||||
print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
|
||||
print(f" {srcn} y = __clc_convert_{srcn}(r);")
|
||||
if mode == "_rtz":
|
||||
if src in int_types:
|
||||
print(
|
||||
" {USRC}{N} abs_x = abs(x);".format(
|
||||
USRC=unsigned_type[src], N=size
|
||||
)
|
||||
)
|
||||
print(
|
||||
" {USRC}{N} abs_y = abs(y);".format(
|
||||
USRC=unsigned_type[src], N=size
|
||||
)
|
||||
)
|
||||
usrcn = f"{unsigned_type[src]}{size}"
|
||||
print(f" {usrcn} abs_x = __clc_abs(x);")
|
||||
print(f" {usrcn} abs_y = __clc_abs(y);")
|
||||
else:
|
||||
print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
|
||||
print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
|
||||
if clspv:
|
||||
print(
|
||||
" {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
|
||||
BOOL=bool_type[dst], N=size
|
||||
)
|
||||
)
|
||||
if sizeof_type[src] >= 4 and src in int_types:
|
||||
print(
|
||||
" c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
|
||||
BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
|
||||
)
|
||||
)
|
||||
print(
|
||||
" {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
|
||||
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
" {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
|
||||
DST=dst, N=size, BOOL=bool_type[dst]
|
||||
)
|
||||
)
|
||||
print(f" {srcn} abs_x = __clc_fabs(x);")
|
||||
print(f" {srcn} abs_y = __clc_fabs(y);")
|
||||
print(f" {booln} c = __clc_convert_{booln}(abs_y > abs_x);")
|
||||
if clspv and sizeof_type[src] >= 4 and src in int_types:
|
||||
print(f" c = c || __clc_convert_{booln}(({srcn}){src_max} == x);")
|
||||
print(
|
||||
f" {dstn} sel = __clc_select(r, __clc_nextafter(r, __clc_sign(r) * ({dstn})-INFINITY), c);"
|
||||
)
|
||||
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
|
||||
dst_max = limit_max[dst]
|
||||
# short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
|
||||
# short is 16 bits signed, so the maximum value rounded to zero
|
||||
# is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
|
||||
if src == "short":
|
||||
dst_max = "0x1.ffcp+14"
|
||||
print(
|
||||
" return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
|
||||
DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max
|
||||
)
|
||||
f" return __clc_clamp(sel, ({dstn}){dst_min}, ({dstn}){dst_max});"
|
||||
)
|
||||
else:
|
||||
print(" return sel;")
|
||||
if mode == "_rtp":
|
||||
print(
|
||||
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
|
||||
DST=dst, N=size, BOOL=bool_type[dst]
|
||||
)
|
||||
f" {dstn} sel = __clc_select(r, __clc_nextafter(r, ({dstn})INFINITY), __clc_convert_{booln}(y < x));"
|
||||
)
|
||||
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
|
||||
print(
|
||||
" return max(sel, ({DST}{N}){DST_MIN});".format(
|
||||
DST=dst, N=size, DST_MIN=limit_min[dst]
|
||||
)
|
||||
)
|
||||
print(f" return __clc_max(sel, ({dstn}){dst_min});")
|
||||
else:
|
||||
print(" return sel;")
|
||||
if mode == "_rtn":
|
||||
if clspv:
|
||||
print(
|
||||
" {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
|
||||
BOOL=bool_type[dst], N=size
|
||||
)
|
||||
)
|
||||
if sizeof_type[src] >= 4 and src in int_types:
|
||||
print(
|
||||
" c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
|
||||
BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
|
||||
)
|
||||
)
|
||||
print(
|
||||
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
|
||||
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(
|
||||
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
|
||||
DST=dst, N=size, BOOL=bool_type[dst]
|
||||
)
|
||||
)
|
||||
print(f" {booln} c = __clc_convert_{booln}(y > x);")
|
||||
if clspv and sizeof_type[src] >= 4 and src in int_types:
|
||||
print(f" c = c || __clc_convert_{booln}(({srcn}){src_max} == x);")
|
||||
print(
|
||||
f" {dstn} sel = __clc_select(r, __clc_nextafter(r, ({dstn})-INFINITY), c);"
|
||||
)
|
||||
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
|
||||
dst_max = limit_max[dst]
|
||||
# short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
|
||||
# short is 16 bits signed, so the maximum value rounded to
|
||||
# negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff
|
||||
# == 32767)
|
||||
if src == "short":
|
||||
dst_max = "0x1.ffcp+14"
|
||||
print(
|
||||
" return min(sel, ({DST}{N}){DST_MAX});".format(
|
||||
DST=dst, N=size, DST_MAX=dst_max
|
||||
)
|
||||
)
|
||||
print(f" return __clc_min(sel, ({dstn}){dst_max});")
|
||||
else:
|
||||
print(" return sel;")
|
||||
|
||||
@@ -588,8 +567,8 @@ for src in types:
|
||||
for dst in float_types:
|
||||
for size in vector_sizes:
|
||||
for mode in rounding_modes:
|
||||
# Do not generate "_rte" conversion for clspv as they are
|
||||
# handled natively
|
||||
if clspv and mode == "_rte":
|
||||
# Do not generate user-facing "_rte" conversions for clspv as
|
||||
# they are handled natively
|
||||
if clspv and not clc and mode == "_rte":
|
||||
continue
|
||||
generate_float_conversion(src, dst, size, mode, "")
|
||||
|
||||
Reference in New Issue
Block a user