This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24, mul_hi, popcount, rhadd, and upsample builtins to the CLC library. This commit also optimizes the vector forms of the mul_hi and upsample builtins to consistently remain in vector types, instead of recursively splitting vectors down to the scalar form. The OpenCL mad_hi builtin wasn't previously publicly available from the CLC libraries, as it was hash-defined to mul_hi in the header files. That issue has been fixed, and mad_hi is now exposed. The custom AMD implementation/workaround for popcount has been removed as it was only required for clang < 7. There are still two integer functions which haven't been moved over. The OpenCL mad_sat builtin uses many of the other integer builtins, and would benefit from optimization for vector types. That can take place in a follow-up commit. The rotate builtin could similarly use some more dedicated focus, potentially using clang builtins.
36 lines
1.9 KiB
Common Lisp
36 lines
1.9 KiB
Common Lisp
#include <clc/internal/clc.h>
|
|
|
|
// TODO: Replace with __clc_convert_<type> when available
|
|
#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
|
|
|
|
#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
|
|
BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE); \
|
|
BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE); \
|
|
return (large_hi << (BGENTYPE)GENSIZE) | large_lo; \
|
|
}
|
|
|
|
#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
|
|
return ((BGENTYPE)hi << GENSIZE) | lo; \
|
|
} \
|
|
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \
|
|
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \
|
|
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \
|
|
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \
|
|
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)
|
|
|
|
#define __CLC_UPSAMPLE_TYPES() \
|
|
__CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
|
|
__CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
|
|
__CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
|
|
__CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
|
|
__CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
|
|
__CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32)
|
|
|
|
__CLC_UPSAMPLE_TYPES()
|
|
|
|
#undef __CLC_UPSAMPLE_TYPES
|
|
#undef __CLC_UPSAMPLE_IMPL
|
|
#undef __CLC_CONVERT_TY
|