[NFC][libclc] Refactor _CLC_*_VECTORIZE macros to functions in .inc files (#145678)

With this PR, if we have customized implementation for scalar or vector length = 2, we don't need to write new macros, e.g. https://github.com/intel/llvm/blob/fb18321705f6/libclc/clc/include/clc/clcmacro.h#L15 Undef __HALF_ONLY, __FLOAT_ONLY and __DOUBLE_ONLY at the end of clc/include/clc/math/gentype.inc llvm-diff shows no change to nvptx64--nvidiacl.bc and amdgcn--amdhsa.bc
2025-06-30 17:19:19 +08:00
parent 1dfc3e8461
commit 338dee0742
90 changed files with 590 additions and 276 deletions
--- a/libclc/clc/include/clc/clcmacro.h
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -12,111 +12,6 @@
 #include <clc/internal/clc.h>
 #include <clc/utils.h>
 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)          \
  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) {                              \
    return (RET_TYPE##2)(FUNCTION(x.s0), FUNCTION(x.s1));                      \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) {                              \
    return (RET_TYPE##3)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2));      \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) {                              \
    return (RET_TYPE##4)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),       \
                         FUNCTION(x.s3));                                      \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) {                              \
    return (RET_TYPE##8)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),       \
                         FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),       \
                         FUNCTION(x.s6), FUNCTION(x.s7));                      \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) {                            \
    return (RET_TYPE##16)(                                                     \
        FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3),        \
        FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7),        \
        FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb),        \
        FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf));       \
  }
 #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
                              ARG2_TYPE)                                       \
  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) {              \
    return (RET_TYPE##2)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1));          \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) {              \
    return (RET_TYPE##3)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
                         FUNCTION(x.s2, y.s2));                                \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) {              \
    return (RET_TYPE##4)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
                         FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3));          \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) {              \
    return (RET_TYPE##8)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
                         FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3),           \
                         FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5),           \
                         FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7));          \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) {           \
    return (RET_TYPE##16)(                                                     \
        FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2),      \
        FUNCTION(x.s3, y.s3), FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5),      \
        FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), FUNCTION(x.s8, y.s8),      \
        FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb),      \
        FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se),      \
        FUNCTION(x.sf, y.sf));                                                 \
  }
 #define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
                               ARG2_TYPE, ARG3_TYPE)                           \
  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y,                \
                                ARG3_TYPE##2 z) {                              \
    return (RET_TYPE##2)(FUNCTION(x.s0, y.s0, z.s0),                           \
                         FUNCTION(x.s1, y.s1, z.s1));                          \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y,                \
                                ARG3_TYPE##3 z) {                              \
    return (RET_TYPE##3)(FUNCTION(x.s0, y.s0, z.s0),                           \
                         FUNCTION(x.s1, y.s1, z.s1),                           \
                         FUNCTION(x.s2, y.s2, z.s2));                          \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y,                \
                                ARG3_TYPE##4 z) {                              \
    return (RET_TYPE##4)(                                                      \
        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3));               \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y,                \
                                ARG3_TYPE##8 z) {                              \
    return (RET_TYPE##8)(                                                      \
        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3),                \
        FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5),                \
        FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7));               \
  }                                                                            \
                                                                               \
  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y,             \
                                 ARG3_TYPE##16 z) {                            \
    return (RET_TYPE##16)(                                                     \
        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3),                \
        FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5),                \
        FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7),                \
        FUNCTION(x.s8, y.s8, z.s8), FUNCTION(x.s9, y.s9, z.s9),                \
        FUNCTION(x.sa, y.sa, z.sa), FUNCTION(x.sb, y.sb, z.sb),                \
        FUNCTION(x.sc, y.sc, z.sc), FUNCTION(x.sd, y.sd, z.sd),                \
        FUNCTION(x.se, y.se, z.se), FUNCTION(x.sf, y.sf, z.sf));               \
  }
 #define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
                              ADDR_SPACE, ARG2_TYPE)                           \
  DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          \
@@ -171,12 +66,4 @@
        FUNCTION(x.sf, ptr + 15));                                             \
  }
 #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE,     \
                                   ARG2_TYPE)                                  \
  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) {         \
    return BUILTIN(x, y);                                                      \
  }                                                                            \
  _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, \
                        ARG2_TYPE)
 #endif // __CLC_CLCMACRO_H__
--- a/libclc/clc/include/clc/geometric/clc_fast_distance.h
+++ b/libclc/clc/include/clc/geometric/clc_fast_distance.h
@@ -15,7 +15,6 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
 #endif // __CLC_GEOMETRIC_CLC_FAST_DISTANCE_H__
--- a/libclc/clc/include/clc/geometric/clc_fast_length.h
+++ b/libclc/clc/include/clc/geometric/clc_fast_length.h
@@ -15,7 +15,6 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
 #endif // __CLC_GEOMETRIC_CLC_FAST_LENGTH_H__
--- a/libclc/clc/include/clc/geometric/clc_fast_normalize.h
+++ b/libclc/clc/include/clc/geometric/clc_fast_normalize.h
@@ -17,6 +17,5 @@
 #undef __CLC_FUNCTION
 #undef __CLC_GEOMETRIC_RET_GENTYPE
 #undef __FLOAT_ONLY
 #endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
--- a/libclc/clc/include/clc/math/clc_exp_helper.h
+++ b/libclc/clc/include/clc/math/clc_exp_helper.h
@@ -14,6 +14,4 @@
 #include <clc/math/gentype.inc>
 #undef __DOUBLE_ONLY
 #endif // __CLC_MATH_CLC_EXP_HELPER
--- a/libclc/clc/include/clc/math/clc_half_cos.h
+++ b/libclc/clc/include/clc/math/clc_half_cos.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_COS_H__
--- a/libclc/clc/include/clc/math/clc_half_divide.h
+++ b/libclc/clc/include/clc/math/clc_half_divide.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_DIVIDE_H__
--- a/libclc/clc/include/clc/math/clc_half_exp.h
+++ b/libclc/clc/include/clc/math/clc_half_exp.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_EXP_H__
--- a/libclc/clc/include/clc/math/clc_half_exp10.h
+++ b/libclc/clc/include/clc/math/clc_half_exp10.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_EXP10_H__
--- a/libclc/clc/include/clc/math/clc_half_exp2.h
+++ b/libclc/clc/include/clc/math/clc_half_exp2.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_EXP2_H__
--- a/libclc/clc/include/clc/math/clc_half_log.h
+++ b/libclc/clc/include/clc/math/clc_half_log.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_LOG_H__
--- a/libclc/clc/include/clc/math/clc_half_log10.h
+++ b/libclc/clc/include/clc/math/clc_half_log10.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_LOG10_H__
--- a/libclc/clc/include/clc/math/clc_half_log2.h
+++ b/libclc/clc/include/clc/math/clc_half_log2.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_LOG2_H__
--- a/libclc/clc/include/clc/math/clc_half_powr.h
+++ b/libclc/clc/include/clc/math/clc_half_powr.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_POWR_H__
--- a/libclc/clc/include/clc/math/clc_half_recip.h
+++ b/libclc/clc/include/clc/math/clc_half_recip.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_RECIP_H__
--- a/libclc/clc/include/clc/math/clc_half_rsqrt.h
+++ b/libclc/clc/include/clc/math/clc_half_rsqrt.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_RSQRT_H__
--- a/libclc/clc/include/clc/math/clc_half_sin.h
+++ b/libclc/clc/include/clc/math/clc_half_sin.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_SIN_H__
--- a/libclc/clc/include/clc/math/clc_half_sqrt.h
+++ b/libclc/clc/include/clc/math/clc_half_sqrt.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_SQRT_H__
--- a/libclc/clc/include/clc/math/clc_half_tan.h
+++ b/libclc/clc/include/clc/math/clc_half_tan.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_HALF_TAN_H__
--- a/libclc/clc/include/clc/math/clc_native_cos.h
+++ b/libclc/clc/include/clc/math/clc_native_cos.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_COS_H__
--- a/libclc/clc/include/clc/math/clc_native_divide.h
+++ b/libclc/clc/include/clc/math/clc_native_divide.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_DIVIDE_H__
--- a/libclc/clc/include/clc/math/clc_native_exp.h
+++ b/libclc/clc/include/clc/math/clc_native_exp.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_EXP_H__
--- a/libclc/clc/include/clc/math/clc_native_exp10.h
+++ b/libclc/clc/include/clc/math/clc_native_exp10.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_EXP10_H__
--- a/libclc/clc/include/clc/math/clc_native_exp2.h
+++ b/libclc/clc/include/clc/math/clc_native_exp2.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_EXP2_H__
--- a/libclc/clc/include/clc/math/clc_native_log.h
+++ b/libclc/clc/include/clc/math/clc_native_log.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_LOG_H__
--- a/libclc/clc/include/clc/math/clc_native_log10.h
+++ b/libclc/clc/include/clc/math/clc_native_log10.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_LOG10_H__
--- a/libclc/clc/include/clc/math/clc_native_log2.h
+++ b/libclc/clc/include/clc/math/clc_native_log2.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_LOG2_H__
--- a/libclc/clc/include/clc/math/clc_native_powr.h
+++ b/libclc/clc/include/clc/math/clc_native_powr.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_POWR_H__
--- a/libclc/clc/include/clc/math/clc_native_recip.h
+++ b/libclc/clc/include/clc/math/clc_native_recip.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_RECIP_H__
--- a/libclc/clc/include/clc/math/clc_native_rsqrt.h
+++ b/libclc/clc/include/clc/math/clc_native_rsqrt.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_RSQRT_H__
--- a/libclc/clc/include/clc/math/clc_native_sin.h
+++ b/libclc/clc/include/clc/math/clc_native_sin.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_SIN_H__
--- a/libclc/clc/include/clc/math/clc_native_sqrt.h
+++ b/libclc/clc/include/clc/math/clc_native_sqrt.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_SQRT_H__
--- a/libclc/clc/include/clc/math/clc_native_tan.h
+++ b/libclc/clc/include/clc/math/clc_native_tan.h
@@ -16,6 +16,5 @@
 #include <clc/math/gentype.inc>
 #undef __CLC_FUNCTION
 #undef __FLOAT_ONLY
 #endif // __CLC_MATH_CLC_NATIVE_TAN_H__
--- a/libclc/clc/include/clc/math/clc_sincos_helpers.h
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers.h
@@ -14,13 +14,9 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #define __DOUBLE_ONLY
 #define __CLC_BODY <clc/math/clc_sincos_helpers_fp64.inc>
 #include <clc/math/gentype.inc>
 #undef __DOUBLE_ONLY
 #endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__
--- a/libclc/clc/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
@@ -349,3 +349,7 @@
 #undef __CLC_AS_GENTYPE
 #undef __CLC_CONVERT_GENTYPE
 #undef __HALF_ONLY
 #undef __FLOAT_ONLY
 #undef __DOUBLE_ONLY
--- a/libclc/clc/include/clc/shared/binary_def_scalarize.inc
+++ b/libclc/clc/include/clc/shared/binary_def_scalarize.inc
@@ -0,0 +1,131 @@
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include <clc/utils.h>
 #ifdef __CLC_SCALAR
 #ifndef __CLC_MIN_VECSIZE
 #define __CLC_MIN_VECSIZE 2
 #endif
 #ifndef __IMPL_FUNCTION
 #define __IMPL_FUNCTION FUNCTION
 #endif
 #ifndef __CLC_DEF_SPEC
 #define __CLC_DEF_SPEC _CLC_DEF
 #endif
 #ifndef __CLC_RET_TYPE
 #define __CLC_RET_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG1_TYPE
 #define __CLC_ARG1_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG2_TYPE
 #define __CLC_ARG2_TYPE __CLC_GENTYPE
 #endif
 #if __CLC_MIN_VECSIZE == 1
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE FUNCTION(__CLC_ARG1_TYPE x,
                                                     __CLC_ARG2_TYPE y) {
  return __IMPL_FUNCTION(x, y);
 }
 #endif // __CLC_MIN_VECSIZE == 1
 #if __CLC_MIN_VECSIZE <= 2
 #define __CLC_RET_TYPE2 __CLC_XCONCAT(__CLC_RET_TYPE, 2)
 #define __CLC_ARG1_TYPE2 __CLC_XCONCAT(__CLC_ARG1_TYPE, 2)
 #define __CLC_ARG2_TYPE2 __CLC_XCONCAT(__CLC_ARG2_TYPE, 2)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE2 FUNCTION(__CLC_ARG1_TYPE2 x,
                                                      __CLC_ARG2_TYPE2 y) {
  return (__CLC_RET_TYPE2)(__IMPL_FUNCTION(x.s0, y.s0),
                           __IMPL_FUNCTION(x.s1, y.s1));
 }
 #undef __CLC_RET_TYPE2
 #undef __CLC_ARG1_TYPE2
 #undef __CLC_ARG2_TYPE2
 #endif // __CLC_MIN_VECSIZE <= 2
 #if __CLC_MIN_VECSIZE > 3
 #error "__CLC_MIN_VECSIZE > 3 isn't implemented"
 #endif
 #define __CLC_RET_TYPE3 __CLC_XCONCAT(__CLC_RET_TYPE, 3)
 #define __CLC_ARG1_TYPE3 __CLC_XCONCAT(__CLC_ARG1_TYPE, 3)
 #define __CLC_ARG2_TYPE3 __CLC_XCONCAT(__CLC_ARG2_TYPE, 3)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE3 FUNCTION(__CLC_ARG1_TYPE3 x,
                                                      __CLC_ARG2_TYPE3 y) {
  return (__CLC_RET_TYPE3)(__IMPL_FUNCTION(x.s0, y.s0),
                           __IMPL_FUNCTION(x.s1, y.s1),
                           __IMPL_FUNCTION(x.s2, y.s2));
 }
 #undef __CLC_RET_TYPE3
 #undef __CLC_ARG1_TYPE3
 #undef __CLC_ARG2_TYPE3
 #define __CLC_RET_TYPE4 __CLC_XCONCAT(__CLC_RET_TYPE, 4)
 #define __CLC_ARG1_TYPE4 __CLC_XCONCAT(__CLC_ARG1_TYPE, 4)
 #define __CLC_ARG2_TYPE4 __CLC_XCONCAT(__CLC_ARG2_TYPE, 4)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE4 FUNCTION(__CLC_ARG1_TYPE4 x,
                                                      __CLC_ARG2_TYPE4 y) {
  return (__CLC_RET_TYPE4)(__IMPL_FUNCTION(x.s0, y.s0),
                           __IMPL_FUNCTION(x.s1, y.s1),
                           __IMPL_FUNCTION(x.s2, y.s2),
                           __IMPL_FUNCTION(x.s3, y.s3));
 }
 #undef __CLC_RET_TYPE4
 #undef __CLC_ARG1_TYPE4
 #undef __CLC_ARG2_TYPE4
 #define __CLC_RET_TYPE8 __CLC_XCONCAT(__CLC_RET_TYPE, 8)
 #define __CLC_ARG1_TYPE8 __CLC_XCONCAT(__CLC_ARG1_TYPE, 8)
 #define __CLC_ARG2_TYPE8 __CLC_XCONCAT(__CLC_ARG2_TYPE, 8)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE8 FUNCTION(__CLC_ARG1_TYPE8 x,
                                                      __CLC_ARG2_TYPE8 y) {
  return (
      __CLC_RET_TYPE8)(__IMPL_FUNCTION(x.s0, y.s0), __IMPL_FUNCTION(x.s1, y.s1),
                       __IMPL_FUNCTION(x.s2, y.s2), __IMPL_FUNCTION(x.s3, y.s3),
                       __IMPL_FUNCTION(x.s4, y.s4), __IMPL_FUNCTION(x.s5, y.s5),
                       __IMPL_FUNCTION(x.s6, y.s6),
                       __IMPL_FUNCTION(x.s7, y.s7));
 }
 #undef __CLC_RET_TYPE8
 #undef __CLC_ARG1_TYPE8
 #undef __CLC_ARG2_TYPE8
 #define __CLC_RET_TYPE16 __CLC_XCONCAT(__CLC_RET_TYPE, 16)
 #define __CLC_ARG1_TYPE16 __CLC_XCONCAT(__CLC_ARG1_TYPE, 16)
 #define __CLC_ARG2_TYPE16 __CLC_XCONCAT(__CLC_ARG2_TYPE, 16)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE16 FUNCTION(__CLC_ARG1_TYPE16 x,
                                                       __CLC_ARG2_TYPE16 y) {
  return (__CLC_RET_TYPE16)(__IMPL_FUNCTION(x.s0, y.s0),
                            __IMPL_FUNCTION(x.s1, y.s1),
                            __IMPL_FUNCTION(x.s2, y.s2),
                            __IMPL_FUNCTION(x.s3, y.s3),
                            __IMPL_FUNCTION(x.s4, y.s4),
                            __IMPL_FUNCTION(x.s5, y.s5),
                            __IMPL_FUNCTION(x.s6, y.s6),
                            __IMPL_FUNCTION(x.s7, y.s7),
                            __IMPL_FUNCTION(x.s8, y.s8),
                            __IMPL_FUNCTION(x.s9, y.s9),
                            __IMPL_FUNCTION(x.sa, y.sa),
                            __IMPL_FUNCTION(x.sb, y.sb),
                            __IMPL_FUNCTION(x.sc, y.sc),
                            __IMPL_FUNCTION(x.sd, y.sd),
                            __IMPL_FUNCTION(x.se, y.se),
                            __IMPL_FUNCTION(x.sf, y.sf));
 }
 #undef __CLC_RET_TYPE16
 #undef __CLC_ARG1_TYPE16
 #undef __CLC_ARG2_TYPE16
 #endif // __CLC_SCALAR
--- a/libclc/clc/include/clc/shared/ternary_def_scalarize.inc
+++ b/libclc/clc/include/clc/shared/ternary_def_scalarize.inc
@@ -0,0 +1,153 @@
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include <clc/utils.h>
 #ifdef __CLC_SCALAR
 #ifndef __CLC_MIN_VECSIZE
 #define __CLC_MIN_VECSIZE 2
 #endif
 #ifndef __IMPL_FUNCTION
 #define __IMPL_FUNCTION FUNCTION
 #endif
 #ifndef __CLC_DEF_SPEC
 #define __CLC_DEF_SPEC _CLC_DEF
 #endif
 #ifndef __CLC_RET_TYPE
 #define __CLC_RET_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG1_TYPE
 #define __CLC_ARG1_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG2_TYPE
 #define __CLC_ARG2_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG3_TYPE
 #define __CLC_ARG3_TYPE __CLC_GENTYPE
 #endif
 #if __CLC_MIN_VECSIZE == 1
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE FUNCTION(__CLC_ARG1_TYPE x,
                                                     __CLC_ARG2_TYPE y,
                                                     __CLC_ARG3_TYPE z) {
  return __IMPL_FUNCTION(x, y, z);
 }
 #endif // __CLC_MIN_VECSIZE == 1
 #if __CLC_MIN_VECSIZE <= 2
 #define __CLC_RET_TYPE2 __CLC_XCONCAT(__CLC_RET_TYPE, 2)
 #define __CLC_ARG1_TYPE2 __CLC_XCONCAT(__CLC_ARG1_TYPE, 2)
 #define __CLC_ARG2_TYPE2 __CLC_XCONCAT(__CLC_ARG2_TYPE, 2)
 #define __CLC_ARG3_TYPE2 __CLC_XCONCAT(__CLC_ARG3_TYPE, 2)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE2 FUNCTION(__CLC_ARG1_TYPE2 x,
                                                      __CLC_ARG2_TYPE2 y,
                                                      __CLC_ARG3_TYPE2 z) {
  return (__CLC_RET_TYPE2)(__IMPL_FUNCTION(x.s0, y.s0, z.s0),
                           __IMPL_FUNCTION(x.s1, y.s1, z.s1));
 }
 #undef __CLC_RET_TYPE2
 #undef __CLC_ARG1_TYPE2
 #undef __CLC_ARG2_TYPE2
 #undef __CLC_ARG3_TYPE2
 #endif // __CLC_MIN_VECSIZE <= 2
 #if __CLC_MIN_VECSIZE > 3
 #error "__CLC_MIN_VECSIZE > 3 isn't implemented"
 #endif
 #define __CLC_RET_TYPE3 __CLC_XCONCAT(__CLC_RET_TYPE, 3)
 #define __CLC_ARG1_TYPE3 __CLC_XCONCAT(__CLC_ARG1_TYPE, 3)
 #define __CLC_ARG2_TYPE3 __CLC_XCONCAT(__CLC_ARG2_TYPE, 3)
 #define __CLC_ARG3_TYPE3 __CLC_XCONCAT(__CLC_ARG3_TYPE, 3)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE3 FUNCTION(__CLC_ARG1_TYPE3 x,
                                                      __CLC_ARG2_TYPE3 y,
                                                      __CLC_ARG3_TYPE3 z) {
  return (__CLC_RET_TYPE3)(__IMPL_FUNCTION(x.s0, y.s0, z.s0),
                           __IMPL_FUNCTION(x.s1, y.s1, z.s1),
                           __IMPL_FUNCTION(x.s2, y.s2, z.s2));
 }
 #undef __CLC_RET_TYPE3
 #undef __CLC_ARG1_TYPE3
 #undef __CLC_ARG2_TYPE3
 #undef __CLC_ARG3_TYPE3
 #define __CLC_RET_TYPE4 __CLC_XCONCAT(__CLC_RET_TYPE, 4)
 #define __CLC_ARG1_TYPE4 __CLC_XCONCAT(__CLC_ARG1_TYPE, 4)
 #define __CLC_ARG2_TYPE4 __CLC_XCONCAT(__CLC_ARG2_TYPE, 4)
 #define __CLC_ARG3_TYPE4 __CLC_XCONCAT(__CLC_ARG3_TYPE, 4)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE4 FUNCTION(__CLC_ARG1_TYPE4 x,
                                                      __CLC_ARG2_TYPE4 y,
                                                      __CLC_ARG3_TYPE4 z) {
  return (__CLC_RET_TYPE4)(__IMPL_FUNCTION(x.s0, y.s0, z.s0),
                           __IMPL_FUNCTION(x.s1, y.s1, z.s1),
                           __IMPL_FUNCTION(x.s2, y.s2, z.s2),
                           __IMPL_FUNCTION(x.s3, y.s3, z.s3));
 }
 #undef __CLC_RET_TYPE4
 #undef __CLC_ARG1_TYPE4
 #undef __CLC_ARG2_TYPE4
 #undef __CLC_ARG3_TYPE4
 #define __CLC_RET_TYPE8 __CLC_XCONCAT(__CLC_RET_TYPE, 8)
 #define __CLC_ARG1_TYPE8 __CLC_XCONCAT(__CLC_ARG1_TYPE, 8)
 #define __CLC_ARG2_TYPE8 __CLC_XCONCAT(__CLC_ARG2_TYPE, 8)
 #define __CLC_ARG3_TYPE8 __CLC_XCONCAT(__CLC_ARG3_TYPE, 8)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE8 FUNCTION(__CLC_ARG1_TYPE8 x,
                                                      __CLC_ARG2_TYPE8 y,
                                                      __CLC_ARG3_TYPE8 z) {
  return (__CLC_RET_TYPE8)(__IMPL_FUNCTION(x.s0, y.s0, z.s0),
                           __IMPL_FUNCTION(x.s1, y.s1, z.s1),
                           __IMPL_FUNCTION(x.s2, y.s2, z.s2),
                           __IMPL_FUNCTION(x.s3, y.s3, z.s3),
                           __IMPL_FUNCTION(x.s4, y.s4, z.s4),
                           __IMPL_FUNCTION(x.s5, y.s5, z.s5),
                           __IMPL_FUNCTION(x.s6, y.s6, z.s6),
                           __IMPL_FUNCTION(x.s7, y.s7, z.s7));
 }
 #undef __CLC_RET_TYPE8
 #undef __CLC_ARG1_TYPE8
 #undef __CLC_ARG2_TYPE8
 #undef __CLC_ARG3_TYPE8
 #define __CLC_RET_TYPE16 __CLC_XCONCAT(__CLC_RET_TYPE, 16)
 #define __CLC_ARG1_TYPE16 __CLC_XCONCAT(__CLC_ARG1_TYPE, 16)
 #define __CLC_ARG2_TYPE16 __CLC_XCONCAT(__CLC_ARG2_TYPE, 16)
 #define __CLC_ARG3_TYPE16 __CLC_XCONCAT(__CLC_ARG3_TYPE, 16)
 _CLC_OVERLOAD __CLC_DEF_SPEC __CLC_RET_TYPE16 FUNCTION(__CLC_ARG1_TYPE16 x,
                                                       __CLC_ARG2_TYPE16 y,
                                                       __CLC_ARG3_TYPE16 z) {
  return (__CLC_RET_TYPE16)(__IMPL_FUNCTION(x.s0, y.s0, z.s0),
                            __IMPL_FUNCTION(x.s1, y.s1, z.s1),
                            __IMPL_FUNCTION(x.s2, y.s2, z.s2),
                            __IMPL_FUNCTION(x.s3, y.s3, z.s3),
                            __IMPL_FUNCTION(x.s4, y.s4, z.s4),
                            __IMPL_FUNCTION(x.s5, y.s5, z.s5),
                            __IMPL_FUNCTION(x.s6, y.s6, z.s6),
                            __IMPL_FUNCTION(x.s7, y.s7, z.s7),
                            __IMPL_FUNCTION(x.s8, y.s8, z.s8),
                            __IMPL_FUNCTION(x.s9, y.s9, z.s9),
                            __IMPL_FUNCTION(x.sa, y.sa, z.sa),
                            __IMPL_FUNCTION(x.sb, y.sb, z.sb),
                            __IMPL_FUNCTION(x.sc, y.sc, z.sc),
                            __IMPL_FUNCTION(x.sd, y.sd, z.sd),
                            __IMPL_FUNCTION(x.se, y.se, z.se),
                            __IMPL_FUNCTION(x.sf, y.sf, z.sf));
 }
 #undef __CLC_RET_TYPE16
 #undef __CLC_ARG1_TYPE16
 #undef __CLC_ARG2_TYPE16
 #undef __CLC_ARG3_TYPE16
 #endif // __CLC_SCALAR
--- a/libclc/clc/include/clc/shared/unary_def_scalarize.inc
+++ b/libclc/clc/include/clc/shared/unary_def_scalarize.inc
@@ -0,0 +1,97 @@
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include <clc/utils.h>
 #ifdef __CLC_SCALAR
 #ifndef __CLC_MIN_VECSIZE
 #define __CLC_MIN_VECSIZE 2
 #endif
 #ifndef __IMPL_FUNCTION
 #define __IMPL_FUNCTION FUNCTION
 #endif
 #ifndef __CLC_RET_TYPE
 #define __CLC_RET_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG1_TYPE
 #define __CLC_ARG1_TYPE __CLC_GENTYPE
 #endif
 #ifndef __CLC_ARG2_TYPE
 #define __CLC_ARG2_TYPE __CLC_GENTYPE
 #endif
 #if __CLC_MIN_VECSIZE == 1
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE FUNCTION(__CLC_ARG1_TYPE x) {
  return __IMPL_FUNCTION(x);
 }
 #endif // __CLC_MIN_VECSIZE == 1
 #if __CLC_MIN_VECSIZE <= 2
 #define __CLC_RET_TYPE2 __CLC_XCONCAT(__CLC_RET_TYPE, 2)
 #define __CLC_ARG1_TYPE2 __CLC_XCONCAT(__CLC_ARG1_TYPE, 2)
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE2 FUNCTION(__CLC_ARG1_TYPE2 x) {
  return (__CLC_RET_TYPE2)(__IMPL_FUNCTION(x.s0), __IMPL_FUNCTION(x.s1));
 }
 #undef __CLC_RET_TYPE2
 #undef __CLC_ARG1_TYPE2
 #endif // __CLC_MIN_VECSIZE <= 2
 #if __CLC_MIN_VECSIZE > 3
 #error "__CLC_MIN_VECSIZE > 3 isn't implemented"
 #endif
 #define __CLC_RET_TYPE3 __CLC_XCONCAT(__CLC_RET_TYPE, 3)
 #define __CLC_ARG1_TYPE3 __CLC_XCONCAT(__CLC_ARG1_TYPE, 3)
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE3 FUNCTION(__CLC_ARG1_TYPE3 x) {
  return (__CLC_RET_TYPE3)(__IMPL_FUNCTION(x.s0), __IMPL_FUNCTION(x.s1),
                           __IMPL_FUNCTION(x.s2));
 }
 #undef __CLC_RET_TYPE3
 #undef __CLC_ARG1_TYPE3
 #define __CLC_RET_TYPE4 __CLC_XCONCAT(__CLC_RET_TYPE, 4)
 #define __CLC_ARG1_TYPE4 __CLC_XCONCAT(__CLC_ARG1_TYPE, 4)
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE4 FUNCTION(__CLC_ARG1_TYPE4 x) {
  return (__CLC_RET_TYPE4)(__IMPL_FUNCTION(x.s0), __IMPL_FUNCTION(x.s1),
                           __IMPL_FUNCTION(x.s2), __IMPL_FUNCTION(x.s3));
 }
 #undef __CLC_RET_TYPE4
 #undef __CLC_ARG1_TYPE4
 #define __CLC_RET_TYPE8 __CLC_XCONCAT(__CLC_RET_TYPE, 8)
 #define __CLC_ARG1_TYPE8 __CLC_XCONCAT(__CLC_ARG1_TYPE, 8)
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE8 FUNCTION(__CLC_ARG1_TYPE8 x) {
  return (__CLC_RET_TYPE8)(__IMPL_FUNCTION(x.s0), __IMPL_FUNCTION(x.s1),
                           __IMPL_FUNCTION(x.s2), __IMPL_FUNCTION(x.s3),
                           __IMPL_FUNCTION(x.s4), __IMPL_FUNCTION(x.s5),
                           __IMPL_FUNCTION(x.s6), __IMPL_FUNCTION(x.s7));
 }
 #undef __CLC_RET_TYPE8
 #undef __CLC_ARG1_TYPE8
 #define __CLC_RET_TYPE16 __CLC_XCONCAT(__CLC_RET_TYPE, 16)
 #define __CLC_ARG1_TYPE16 __CLC_XCONCAT(__CLC_ARG1_TYPE, 16)
 _CLC_OVERLOAD _CLC_DEF __CLC_RET_TYPE16 FUNCTION(__CLC_ARG1_TYPE16 x) {
  return (__CLC_RET_TYPE16)(__IMPL_FUNCTION(x.s0), __IMPL_FUNCTION(x.s1),
                            __IMPL_FUNCTION(x.s2), __IMPL_FUNCTION(x.s3),
                            __IMPL_FUNCTION(x.s4), __IMPL_FUNCTION(x.s5),
                            __IMPL_FUNCTION(x.s6), __IMPL_FUNCTION(x.s7),
                            __IMPL_FUNCTION(x.s8), __IMPL_FUNCTION(x.s9),
                            __IMPL_FUNCTION(x.sa), __IMPL_FUNCTION(x.sb),
                            __IMPL_FUNCTION(x.sc), __IMPL_FUNCTION(x.sd),
                            __IMPL_FUNCTION(x.se), __IMPL_FUNCTION(x.sf));
 }
 #undef __CLC_RET_TYPE16
 #undef __CLC_ARG1_TYPE16
 #endif // __CLC_SCALAR
--- a/libclc/clc/lib/amdgcn/math/clc_fmax.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_fmax.cl
@@ -18,7 +18,6 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
  y = __builtin_canonicalizef(y);
  return __builtin_fmaxf(x, y);
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmax, float, float)
 #ifdef cl_khr_fp64
@@ -29,8 +28,6 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
  y = __builtin_canonicalize(y);
  return __builtin_fmax(x, y);
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmax, double,
                      double)
 #endif
 #ifdef cl_khr_fp16
@@ -44,6 +41,9 @@ _CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
    return x;
  return (y < x) ? x : y;
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmax, half, half)
 #endif
 #define FUNCTION __clc_fmax
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/amdgcn/math/clc_fmin.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_fmin.cl
@@ -18,7 +18,6 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
  y = __builtin_canonicalizef(y);
  return __builtin_fminf(x, y);
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmin, float, float)
 #ifdef cl_khr_fp64
@@ -29,8 +28,6 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
  y = __builtin_canonicalize(y);
  return __builtin_fmin(x, y);
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmin, double,
                      double)
 #endif
@@ -45,6 +42,9 @@ _CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
    return x;
  return (y < x) ? y : x;
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmin, half, half)
 #endif
 #define FUNCTION __clc_fmin
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl
+++ b/libclc/clc/lib/amdgcn/math/clc_ldexp_override.cl
@@ -10,14 +10,25 @@
 #include <clc/internal/clc.h>
 #include <clc/math/clc_ldexp.h>
 #define FUNCTION __clc_ldexp
 #define __CLC_ARG2_TYPE int
 #define __CLC_MIN_VECSIZE 1
 #ifdef __HAS_LDEXPF__
 // This defines all the ldexp(floatN, intN) variants.
-_CLC_DEFINE_BINARY_BUILTIN(float, __clc_ldexp, __builtin_amdgcn_ldexpf, float, int);
+#define __FLOAT_ONLY
 #define __IMPL_FUNCTION __builtin_amdgcn_ldexpf
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __IMPL_FUNCTION
 #endif
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 // This defines all the ldexp(doubleN, intN) variants.
-_CLC_DEFINE_BINARY_BUILTIN(double, __clc_ldexp, __builtin_amdgcn_ldexp, double,
+#define __DOUBLE_ONLY
-                           int);
+#define __IMPL_FUNCTION __builtin_amdgcn_ldexp
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __IMPL_FUNCTION
 #endif
--- a/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl
+++ b/libclc/clc/lib/amdgpu/math/clc_native_exp2.cl
@@ -9,8 +9,9 @@
 #include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-_CLC_OVERLOAD _CLC_DEF float __clc_native_exp2(float val) {
+#define __FLOAT_ONLY
-  return __builtin_amdgcn_exp2f(val);
+#define __CLC_MIN_VECSIZE 1
-}
+#define FUNCTION __clc_native_exp2
-
+#define __IMPL_FUNCTION __builtin_amdgcn_exp2f
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_native_exp2, float)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
+++ b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl
@@ -43,6 +43,9 @@ _CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
  return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_sqrt
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #endif
--- a/libclc/clc/lib/clspv/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/clspv/math/clc_sw_fma.cl
@@ -269,5 +269,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
                        ((uint)st_fma.mantissa.lo & 0x7fffff));
 }
-_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float,
+#define __FLOAT_ONLY
-                       float, float)
+#define FUNCTION __clc_sw_fma
 #define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/integer/clc_clz.cl
+++ b/libclc/clc/lib/generic/integer/clc_clz.cl
@@ -42,11 +42,6 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) {
  return x ? __builtin_clzl(x) : 64;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_clz, char)
+#define FUNCTION __clc_clz
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_clz, uchar)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_clz, short)
+#include <clc/integer/gentype.inc>
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_clz, ushort)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __clc_clz, int)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __clc_clz, uint)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __clc_clz, long)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __clc_clz, ulong)
--- a/libclc/clc/lib/generic/integer/clc_ctz.cl
+++ b/libclc/clc/lib/generic/integer/clc_ctz.cl
@@ -38,11 +38,6 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_ctz(ulong x) {
  return __builtin_ctzg(x, 64);
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_ctz, char)
+#define FUNCTION __clc_ctz
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_ctz, uchar)
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_ctz, short)
+#include <clc/integer/gentype.inc>
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_ctz, ushort)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __clc_ctz, int)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, __clc_ctz, uint)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, __clc_ctz, long)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, __clc_ctz, ulong)
--- a/libclc/clc/lib/generic/math/clc_erf.cl
+++ b/libclc/clc/lib/generic/math/clc_erf.cl
@@ -211,7 +211,11 @@ _CLC_OVERLOAD _CLC_DEF float __clc_erf(float x) {
  return ret;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_erf, float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_erf
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -496,7 +500,11 @@ _CLC_OVERLOAD _CLC_DEF double __clc_erf(double y) {
  return y < 0.0 ? -ret : ret;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_erf, double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_erf
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/generic/math/clc_erfc.cl
+++ b/libclc/clc/lib/generic/math/clc_erfc.cl
@@ -211,7 +211,11 @@ _CLC_OVERLOAD _CLC_DEF float __clc_erfc(float x) {
  return ret;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_erfc, float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_erfc
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -505,7 +509,11 @@ _CLC_OVERLOAD _CLC_DEF double __clc_erfc(double x) {
  return ret;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_erfc, double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_erfc
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/generic/math/clc_fmax.cl
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -10,13 +10,29 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/clc_isnan.h>
-_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmax, __builtin_fmaxf, float, float);
+#define __FLOAT_ONLY
 #define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
 #define __IMPL_FUNCTION __builtin_fmaxf
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __CLC_MIN_VECSIZE
 #undef FUNCTION
 #undef __IMPL_FUNCTION
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmax, __builtin_fmax, double, double);
+#define __DOUBLE_ONLY
 #define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
 #define __IMPL_FUNCTION __builtin_fmax
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __CLC_MIN_VECSIZE
 #undef FUNCTION
 #undef __IMPL_FUNCTION
 #endif
@@ -31,6 +47,12 @@ _CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
    return x;
  return (x < y) ? y : x;
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmax, half, half)
+
 #define __HALF_ONLY
 #define __CLC_SUPPORTED_VECSIZE_OR_1 2
 #define FUNCTION __clc_fmax
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/generic/math/clc_fmin.cl
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -10,13 +10,29 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/clc_isnan.h>
-_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmin, __builtin_fminf, float, float);
+#define __FLOAT_ONLY
 #define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
 #define __IMPL_FUNCTION __builtin_fminf
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __CLC_MIN_VECSIZE
 #undef FUNCTION
 #undef __IMPL_FUNCTION
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmin, __builtin_fmin, double, double);
+#define __DOUBLE_ONLY
 #define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
 #define __IMPL_FUNCTION __builtin_fmin
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef __CLC_MIN_VECSIZE
 #undef FUNCTION
 #undef __IMPL_FUNCTION
 #endif
@@ -31,6 +47,11 @@ _CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
    return x;
  return (y < x) ? y : x;
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmin, half, half)
+
 #define __HALF_ONLY
 #define __CLC_SUPPORTED_VECSIZE_OR_1 2
 #define FUNCTION __clc_fmin
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #endif
--- a/libclc/clc/lib/generic/math/clc_fmod.cl
+++ b/libclc/clc/lib/generic/math/clc_fmod.cl
@@ -63,7 +63,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) {
  return xr;
 }
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);
+
 #define __FLOAT_ONLY
 #define FUNCTION __clc_fmod
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -170,8 +175,13 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
  return ret;
 }
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_fmod, double,
+
-                      double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_fmod
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
 #ifdef cl_khr_fp16
--- a/libclc/clc/lib/generic/math/clc_ldexp.cl
+++ b/libclc/clc/lib/generic/math/clc_ldexp.cl
@@ -86,8 +86,6 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
  return val_f;
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF_ldexp, float, __clc_ldexp, float, int);
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -121,8 +119,6 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
  return mr;
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF_ldexp, double, __clc_ldexp, double, int);
 #endif
 #ifdef cl_khr_fp16
@@ -133,6 +129,10 @@ _CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) {
  return (half)__clc_ldexp((float)x, n);
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF_ldexp, half, __clc_ldexp, half, int);
 #endif
 #define FUNCTION __clc_ldexp
 #define __CLC_DEF_SPEC _CLC_DEF_ldexp
 #define __CLC_ARG2_TYPE int
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_log.cl
+++ b/libclc/clc/lib/generic/math/clc_log.cl
@@ -19,8 +19,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_log(float x) {
  return __clc_log2(x) * (1.0f / M_LOG2E_F);
 }
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_log, float);
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -29,8 +27,6 @@ _CLC_OVERLOAD _CLC_DEF double __clc_log(double x) {
  return __clc_log2(x) * (1.0 / M_LOG2E);
 }
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_log, double);
 #endif // cl_khr_fp64
 #ifdef cl_khr_fp16
@@ -41,6 +37,8 @@ _CLC_OVERLOAD _CLC_DEF half __clc_log(half x) {
  return (half)__clc_log2((float)x) * (1.0h / M_LOG2E_H);
 }
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_log, half);
 #endif // cl_khr_fp16
 #define FUNCTION __clc_log
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_log10.cl
+++ b/libclc/clc/lib/generic/math/clc_log10.cl
@@ -22,12 +22,6 @@
 #include "clc_log_base.h"
 #undef COMPILING_LOG10
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_log10, float);
+#define FUNCTION __clc_log10
-
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
-#ifdef cl_khr_fp64
+#include <clc/math/gentype.inc>
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_log10, double);
 #endif // cl_khr_fp64
 #ifdef cl_khr_fp16
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_log10, half);
 #endif // cl_khr_fp16
--- a/libclc/clc/lib/generic/math/clc_log2.cl
+++ b/libclc/clc/lib/generic/math/clc_log2.cl
@@ -22,12 +22,6 @@
 #include "clc_log_base.h"
 #undef COMPILING_LOG2
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_log2, float);
+#define FUNCTION __clc_log2
-
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
-#ifdef cl_khr_fp64
+#include <clc/math/gentype.inc>
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_log2, double);
 #endif // cl_khr_fp64
 #ifdef cl_khr_fp16
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_log2, half);
 #endif // cl_khr_fp16
--- a/libclc/clc/lib/generic/math/clc_remainder.cl
+++ b/libclc/clc/lib/generic/math/clc_remainder.cl
@@ -73,8 +73,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remainder(float x, float y) {
  return xr;
 }
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_remainder, float,
+
-                      float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_remainder
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -207,8 +211,13 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
  return ret;
 }
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_remainder, double,
+
-                      double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_remainder
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
 #ifdef cl_khr_fp16
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@@ -32,8 +32,6 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -52,6 +50,4 @@
 #include <clc/math/gentype.inc>
 #undef __DOUBLE_ONLY
 #endif
--- a/libclc/clc/lib/generic/math/clc_sw_fma.cl
+++ b/libclc/clc/lib/generic/math/clc_sw_fma.cl
@@ -160,5 +160,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
                        ((uint)st_fma.mantissa & 0x7fffff));
 }
-_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float,
+#define __FLOAT_ONLY
-                       float, float)
+#define FUNCTION __clc_sw_fma
 #define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/generic/math/clc_tgamma.cl
+++ b/libclc/clc/lib/generic/math/clc_tgamma.cl
@@ -32,7 +32,11 @@ _CLC_OVERLOAD _CLC_DEF float __clc_tgamma(float x) {
  return g;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_tgamma, float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_tgamma
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -55,7 +59,11 @@ _CLC_OVERLOAD _CLC_DEF double __clc_tgamma(double x) {
  return g;
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_tgamma, double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_tgamma
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/r600/math/clc_fmax.cl
+++ b/libclc/clc/lib/r600/math/clc_fmax.cl
@@ -17,7 +17,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
  y = __clc_flush_denormal_if_not_supported(y);
  return __builtin_fmaxf(x, y);
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmax, float, float)
+
 #define __FLOAT_ONLY
 #define FUNCTION __clc_fmax
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -26,7 +31,11 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmax, float, float)
 _CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
  return __builtin_fmax(x, y);
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmax, double,
+
-                      double)
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_fmax
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/r600/math/clc_fmin.cl
+++ b/libclc/clc/lib/r600/math/clc_fmin.cl
@@ -18,7 +18,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
  y = __clc_flush_denormal_if_not_supported(y);
  return __builtin_fminf(x, y);
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmin, float, float)
+
 #define __FLOAT_ONLY
 #define FUNCTION __clc_fmin
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -27,7 +32,11 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_fmin, float, float)
 _CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
  return __builtin_fmin(x, y);
 }
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_fmin, double,
+
-                      double)
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_fmin
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/clc/lib/r600/math/clc_native_rsqrt.cl
+++ b/libclc/clc/lib/r600/math/clc_native_rsqrt.cl
@@ -13,4 +13,7 @@ _CLC_OVERLOAD _CLC_DEF float __clc_native_rsqrt(float x) {
  return __builtin_r600_recipsqrt_ieeef(x);
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_native_rsqrt, float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_native_rsqrt
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
--- a/libclc/clc/lib/r600/math/clc_rsqrt_override.cl
+++ b/libclc/clc/lib/r600/math/clc_rsqrt_override.cl
@@ -13,7 +13,11 @@ _CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) {
  return __builtin_r600_recipsqrt_ieeef(x);
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_rsqrt, float);
+#define __FLOAT_ONLY
 #define FUNCTION __clc_rsqrt
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #ifdef cl_khr_fp64
@@ -23,6 +27,10 @@ _CLC_OVERLOAD _CLC_DEF double __clc_rsqrt(double x) {
  return __builtin_r600_recipsqrt_ieee(x);
 }
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_rsqrt, double);
+#define __DOUBLE_ONLY
 #define FUNCTION __clc_rsqrt
 #define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
 #undef FUNCTION
 #endif
--- a/libclc/opencl/include/clc/opencl/geometric/fast_distance.h
+++ b/libclc/opencl/include/clc/opencl/geometric/fast_distance.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/geometric/fast_length.h
+++ b/libclc/opencl/include/clc/opencl/geometric/fast_length.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/geometric/fast_normalize.h
+++ b/libclc/opencl/include/clc/opencl/geometric/fast_normalize.h
@@ -15,4 +15,3 @@
 #undef __CLC_FUNCTION
 #undef __CLC_GEOMETRIC_RET_GENTYPE
 #undef __FLOAT_ONLY
--- a/libclc/opencl/include/clc/opencl/math/half_cos.h
+++ b/libclc/opencl/include/clc/opencl/math/half_cos.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_exp.h
+++ b/libclc/opencl/include/clc/opencl/math/half_exp.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_exp10.h
+++ b/libclc/opencl/include/clc/opencl/math/half_exp10.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_exp2.h
+++ b/libclc/opencl/include/clc/opencl/math/half_exp2.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_log.h
+++ b/libclc/opencl/include/clc/opencl/math/half_log.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_log10.h
+++ b/libclc/opencl/include/clc/opencl/math/half_log10.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_log2.h
+++ b/libclc/opencl/include/clc/opencl/math/half_log2.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_recip.h
+++ b/libclc/opencl/include/clc/opencl/math/half_recip.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_rsqrt.h
+++ b/libclc/opencl/include/clc/opencl/math/half_rsqrt.h
@@ -10,5 +10,4 @@
 #define __CLC_FUNCTION half_rsqrt
 #define __FLOAT_ONLY
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_sin.h
+++ b/libclc/opencl/include/clc/opencl/math/half_sin.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_sqrt.h
+++ b/libclc/opencl/include/clc/opencl/math/half_sqrt.h
@@ -10,5 +10,4 @@
 #define __CLC_FUNCTION half_sqrt
 #define __FLOAT_ONLY
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/half_tan.h
+++ b/libclc/opencl/include/clc/opencl/math/half_tan.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_cos.h
+++ b/libclc/opencl/include/clc/opencl/math/native_cos.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_exp.h
+++ b/libclc/opencl/include/clc/opencl/math/native_exp.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_exp10.h
+++ b/libclc/opencl/include/clc/opencl/math/native_exp10.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_exp2.h
+++ b/libclc/opencl/include/clc/opencl/math/native_exp2.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_log.h
+++ b/libclc/opencl/include/clc/opencl/math/native_log.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_log10.h
+++ b/libclc/opencl/include/clc/opencl/math/native_log10.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_log2.h
+++ b/libclc/opencl/include/clc/opencl/math/native_log2.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_recip.h
+++ b/libclc/opencl/include/clc/opencl/math/native_recip.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_rsqrt.h
+++ b/libclc/opencl/include/clc/opencl/math/native_rsqrt.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_sin.h
+++ b/libclc/opencl/include/clc/opencl/math/native_sin.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_sqrt.h
+++ b/libclc/opencl/include/clc/opencl/math/native_sqrt.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION
--- a/libclc/opencl/include/clc/opencl/math/native_tan.h
+++ b/libclc/opencl/include/clc/opencl/math/native_tan.h
@@ -12,5 +12,4 @@
 #include <clc/math/gentype.inc>
 #undef __FLOAT_ONLY
 #undef __CLC_FUNCTION