[libclc] Optimize CLC vector relational builtins (#124537)

Clang knows how to perform relational operations on OpenCL vectors, so
we don't need to use the Clang builtins. The builtins we were using
didn't support vector types, so we were previously scalarizing.

This commit generates the same LLVM fcmp operations as before, just
without the scalarization.
This commit is contained in:
Fraser Cormack
2025-01-27 13:25:37 +00:00
committed by GitHub
parent ef54e0bbfb
commit 347fb208c1
8 changed files with 90 additions and 130 deletions

View File

@@ -142,4 +142,30 @@
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE)
#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##2 FUNCTION(ARG1_TYPE##2 x, \
ARG2_TYPE##2 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##3 FUNCTION(ARG1_TYPE##3 x, \
ARG2_TYPE##3 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##4 FUNCTION(ARG1_TYPE##4 x, \
ARG2_TYPE##4 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##8 FUNCTION(ARG1_TYPE##8 x, \
ARG2_TYPE##8 y) { \
return _CLC_RELATIONAL_OP(x, y); \
} \
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##16 FUNCTION(ARG1_TYPE##16 x, \
ARG2_TYPE##16 y) { \
return _CLC_RELATIONAL_OP(x, y); \
}
#endif // __CLC_RELATIONAL_RELATIONAL_H__

View File

@@ -1,44 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return (x == y); \
}
#define _CLC_RELATIONAL_OP(X, Y) (X) == (Y)
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float)
_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2)
_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3)
_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4)
_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8)
_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isequal, float, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of __clc_isequal(double) returns an int, but the vector
// versions return long.
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double)
_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2)
_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3)
_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4)
_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8)
_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16)
// The scalar version of __clc_isequal(double, double) returns an int, but the
// vector versions return long.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isequal, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isequal(half) returns an int, but the vector
// versions return short.
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half)
_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2)
_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3)
_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4)
_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8)
_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16)
// The scalar version of __clc_isequal(half, half) returns an int, but the
// vector versions return short.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isequal, half, half)
#endif
#undef _CLC_DEFINE_ISEQUAL
#undef _CLC_RELATIONAL_OP

View File

@@ -1,12 +1,9 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
// Note: It would be nice to use __builtin_isgreater with vector inputs, but it
// seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) > (Y)
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreater, float, float)
#ifdef cl_khr_fp64
@@ -14,12 +11,7 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
// The scalar version of __clc_isgreater(double, double) returns an int, but the
// vector versions return long.
_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) {
return __builtin_isgreater(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreater, double, double)
#endif
@@ -29,11 +21,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
// The scalar version of __clc_isgreater(half, half) returns an int, but the
// vector versions return short.
_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) {
return __builtin_isgreater(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreater, half, half)
#endif
#undef _CLC_RELATIONAL_OP

View File

@@ -1,12 +1,10 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
// Note: It would be nice to use __builtin_isgreaterequal with vector inputs,
// but it seems to only take scalar values as input, which will produce
// incorrect output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) >= (Y)
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
__builtin_isgreaterequal, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreaterequal, float,
float)
#ifdef cl_khr_fp64
@@ -14,26 +12,20 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
// The scalar version of __clc_isgreaterequal(double, double) returns an int,
// but the vector versions return long.
_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) {
return __builtin_isgreaterequal(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double,
double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreaterequal, double,
double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isgreaterequal(half, half) returns an int, but
// The scalar version of __clc_isgreaterequal(half, hafl) returns an int, but
// the vector versions return short.
_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) {
return __builtin_isgreaterequal(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreaterequal, half,
half)
#endif
#undef _CLC_RELATIONAL_OP

View File

@@ -1,37 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
// Note: It would be nice to use __builtin_isless with vector inputs, but it
// seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) < (Y)
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isless, float, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of __clc_isless(double, double) returns an int, but the
// vector versions return long.
_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) {
return __builtin_isless(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double)
// The scalar version of __clc_isless(double, double) returns an int, but
// the vector versions return long.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isless, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isless(half, half) returns an int, but the vector
// versions return short.
_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) {
return __builtin_isless(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half)
// The scalar version of __clc_isless(half, half) returns an int, but the
// vector versions return short.
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isless, half, half)
#endif
#undef _CLC_RELATIONAL_OP

View File

@@ -1,12 +1,9 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
// Note: It would be nice to use __builtin_islessequal with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) (X) <= (Y)
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessequal, float, float)
#ifdef cl_khr_fp64
@@ -14,12 +11,8 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
// The scalar version of __clc_islessequal(double, double) returns an int, but
// the vector versions return long.
_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) {
return __builtin_islessequal(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessequal, double,
double)
#endif
@@ -29,11 +22,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
// The scalar version of __clc_islessequal(half, half) returns an int, but the
// vector versions return short.
_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) {
return __builtin_islessequal(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessequal, half, half)
#endif
#undef _CLC_RELATIONAL_OP

View File

@@ -1,12 +1,10 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
// Note: It would be nice to use __builtin_islessgreater with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) ((X) < (Y)) || ((X) > (Y))
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessgreater, float,
float)
#ifdef cl_khr_fp64
@@ -14,25 +12,20 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
// The scalar version of __clc_islessgreater(double, double) returns an int, but
// the vector versions return long.
_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) {
return __builtin_islessgreater(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessgreater, double,
double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_islessgreater(half, half) returns an int, but the
// vector versions return short.
_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) {
return __builtin_islessgreater(x, y);
}
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessgreater, half,
half)
#endif
#undef _CLC_RELATIONAL_OP

View File

@@ -1,33 +1,28 @@
#include <clc/internal/clc.h>
#include <clc/relational/relational.h>
#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return (x != y); \
}
#define _CLC_RELATIONAL_OP(X, Y) (X) != (Y)
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isnotequal, float, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of __clc_isnotequal(double, double) returns an int, but
// the vector versions return long.
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isnotequal, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isnotequal(half, half) returns an int, but the
// vector versions return short.
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isnotequal, half, half)
#endif
#undef _CLC_DEFINE_ISNOTEQUAL
#undef _CLC_RELATIONAL_OP