This commit moves over the OpenCL clz, hadd, mad24, mad_hi, mul24, mul_hi, popcount, rhadd, and upsample builtins to the CLC library. This commit also optimizes the vector forms of the mul_hi and upsample builtins to consistently remain in vector types, instead of recursively splitting vectors down to the scalar form. The OpenCL mad_hi builtin wasn't previously publicly available from the CLC libraries, as it was hash-defined to mul_hi in the header files. That issue has been fixed, and mad_hi is now exposed. The custom AMD implementation/workaround for popcount has been removed as it was only required for clang < 7. There are still two integer functions which haven't been moved over. The OpenCL mad_sat builtin uses many of the other integer builtins, and would benefit from optimization for vector types. That can take place in a follow-up commit. The rotate builtin could similarly use some more dedicated focus, potentially using clang builtins.
37 lines
2.2 KiB
Common Lisp
37 lines
2.2 KiB
Common Lisp
#include <clc/clc.h>
|
|
#include <clc/integer/clc_upsample.h>
|
|
|
|
#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE) \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
} \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
} \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
} \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
} \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
} \
|
|
_CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, \
|
|
UGENTYPE##16 lo) { \
|
|
return __clc_upsample(hi, lo); \
|
|
}
|
|
|
|
#define __CLC_UPSAMPLE_TYPES() \
|
|
__CLC_UPSAMPLE_IMPL(short, char, uchar) \
|
|
__CLC_UPSAMPLE_IMPL(ushort, uchar, uchar) \
|
|
__CLC_UPSAMPLE_IMPL(int, short, ushort) \
|
|
__CLC_UPSAMPLE_IMPL(uint, ushort, ushort) \
|
|
__CLC_UPSAMPLE_IMPL(long, int, uint) \
|
|
__CLC_UPSAMPLE_IMPL(ulong, uint, uint)
|
|
|
|
__CLC_UPSAMPLE_TYPES()
|
|
|
|
#undef __CLC_UPSAMPLE_TYPES
|
|
#undef __CLC_UPSAMPLE_IMPL
|