[libclc] Support the generic address space (#137183)

This commit provides definitions of builtins with the generic address
space.

One concept to consider is the difference between supporting the generic
address space from the user's perspective and the requirement for libclc
as a compiler implementation detail to define separate generic address
space builtins. In practice a target (like NVPTX) might notionally
support the generic address space, but it's mapped to the same LLVM
target address space as another address space (often the private one).

In such cases libclc must be careful not to define both private and
generic overloads of the same builtin. We track these two concepts
separately, and make the assumption that if the generic address space
does clash with another, it's with the private one. We track the
concepts separately because there are some builtins such as atomics that
are defined for the generic address space but not the private address
space.
This commit is contained in:
Fraser Cormack
2025-05-21 17:50:00 +01:00
committed by GitHub
parent d997b4f531
commit 94142d9bb0
21 changed files with 177 additions and 4 deletions

View File

@@ -420,12 +420,40 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
-D${CLC_TARGET_DEFINE}
# All libclc builtin libraries see CLC headers
-I${CMAKE_CURRENT_SOURCE_DIR}/clc/include
# Error on undefined macros
-Werror=undef
)
if( NOT "${cpu}" STREQUAL "" )
list( APPEND build_flags -mcpu=${cpu} )
endif()
# Generic address space support.
# Note: when declaring builtins, we must consider that even if a target
# formally/nominally supports the generic address space, in practice that
# target may map it to the same target address space as another address
# space (often the private one). In such cases we must be careful not to
# multiply-define a builtin in a single target address space, as it would
# result in a mangling clash.
# For this reason we must consider the target support of the generic
# address space separately from the *implementation* decision about whether
# to declare certain builtins in that address space.
# Note: we assume that if there is no distinct generic address space, it
# maps to the private address space.
set ( private_addrspace_val 0 )
set ( generic_addrspace_val 0 )
if( ARCH STREQUAL amdgcn OR ARCH STREQUAL r600 OR ARCH STREQUAL amdgcn-amdhsa )
set ( private_addrspace_val 5 )
endif()
if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64
OR ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
set ( generic_addrspace_val 4 )
endif()
list( APPEND build_flags
-D__CLC_PRIVATE_ADDRSPACE_VAL=${private_addrspace_val}
-D__CLC_GENERIC_ADDRSPACE_VAL=${generic_addrspace_val}
)
set( clc_build_flags ${build_flags} -DCLC_INTERNAL )
add_libclc_builtin_set(

View File

@@ -23,4 +23,18 @@
#define _CLC_DEF __attribute__((always_inline))
#endif
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
defined(__opencl_c_generic_address_space))
#define _CLC_GENERIC_AS_SUPPORTED 1
#if __CLC_PRIVATE_ADDRSPACE_VAL != __CLC_GENERIC_ADDRSPACE_VAL
#define _CLC_DISTINCT_GENERIC_AS_SUPPORTED 1
#else
#define _CLC_DISTINCT_GENERIC_AS_SUPPORTED 0
#endif
#else
#define _CLC_GENERIC_AS_SUPPORTED 0
#define _CLC_DISTINCT_GENERIC_AS_SUPPORTED 0
#endif
#endif // __CLC_CLCFUNC_H_

View File

@@ -17,3 +17,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
__CLC_GENTYPE y,
local __CLC_INTN *q);
#if _CLC_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
__CLC_GENTYPE y,
generic __CLC_INTN *q);
#endif

View File

@@ -12,3 +12,7 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
local __CLC_INTN *iptr);
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
private __CLC_INTN *iptr);
#if _CLC_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
generic __CLC_INTN *iptr);
#endif

View File

@@ -12,3 +12,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
local __CLC_GENTYPE *ptr);
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
__CLC_FUNCTION(__CLC_GENTYPE x, private __CLC_GENTYPE *ptr);
#if _CLC_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
__CLC_FUNCTION(__CLC_GENTYPE x, generic __CLC_GENTYPE *ptr);
#endif

View File

@@ -26,3 +26,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
local __CLC_INTN *iptr) {
return __CLC_FUNCTION(FUNCTION)(x, iptr);
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
generic __CLC_INTN *iptr) {
return __CLC_FUNCTION(FUNCTION)(x, iptr);
}
#endif

View File

@@ -26,3 +26,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
local __CLC_GENTYPE *ptr) {
return __CLC_FUNCTION(FUNCTION)(x, ptr);
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x,
generic __CLC_GENTYPE *ptr) {
return __CLC_FUNCTION(FUNCTION)(x, ptr);
}
#endif

View File

@@ -34,5 +34,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fract(__CLC_GENTYPE x,
FRACT_DEF(local);
FRACT_DEF(global);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FRACT_DEF(generic);
#endif
#undef MIN_CONSTANT

View File

@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
#include <clc/clcfunc.h>
#include <clc/internal/clc.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isinf.h>
@@ -28,3 +29,10 @@
#define __CLC_ADDRESS_SPACE local
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define __CLC_BODY <clc_frexp.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif

View File

@@ -23,4 +23,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_modf(__CLC_GENTYPE x,
CLC_MODF_DEF(local);
CLC_MODF_DEF(global);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
CLC_MODF_DEF(generic);
#endif
#undef CLC_MODF_DEF

View File

@@ -29,3 +29,9 @@
#define __CLC_ADDRESS_SPACE local
#include <clc_remquo.inc>
#undef __CLC_ADDRESS_SPACE
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define __CLC_ADDRESS_SPACE generic
#include <clc_remquo.inc>
#undef __CLC_ADDRESS_SPACE
#endif

View File

@@ -15,5 +15,8 @@
__CLC_DECLARE_SINCOS(global, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE)
#endif
#undef __CLC_DECLARE_SINCOS

View File

@@ -11,4 +11,11 @@
#define __CLC_BODY <clc/math/remquo_decl.inc>
#include <clc/math/gentype.inc>
#if _CLC_GENERIC_AS_SUPPORTED
#define __CLC_BODY <clc/math/remquo_decl.inc>
#define __CLC_ADDRESS_SPACE generic
#include <clc/math/gentype.inc>
#undef __CLC_ADDRESS_SPACE
#endif
#undef __CLC_FUNCTION

View File

@@ -17,11 +17,19 @@
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
_CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
#if _CLC_GENERIC_AS_SUPPORTED
#define _CLC_VECTOR_VLOAD_GENERIC_DECL _CLC_VECTOR_VLOAD_DECL
#else
// The generic address space isn't available, so make the macro do nothing
#define _CLC_VECTOR_VLOAD_GENERIC_DECL(X, Y, Z, W)
#endif
#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
_CLC_VECTOR_VLOAD_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic)
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
@@ -61,7 +69,13 @@ _CLC_VLOAD_DECL(a_half, half, float, , __global)
_CLC_VLOAD_DECL(a_half, half, float, , __local)
_CLC_VLOAD_DECL(a_half, half, float, , __private)
#if _CLC_GENERIC_AS_SUPPORTED
_CLC_VLOAD_DECL(_half, half, float, , __generic)
_CLC_VLOAD_DECL(a_half, half, float, , __generic)
#endif
#undef _CLC_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_PRIM3
#undef _CLC_VECTOR_VLOAD_PRIM1
#undef _CLC_VECTOR_VLOAD_GENERIC_DECL

View File

@@ -17,10 +17,20 @@
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE, RND) \
_CLC_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE, RND)
#if _CLC_GENERIC_AS_SUPPORTED
#define _CLC_VSTORE_GENERIC_DECL _CLC_VSTORE_DECL
#define _CLC_VECTOR_VSTORE_GENERIC_DECL _CLC_VECTOR_VSTORE_DECL
#else
// The generic address space isn't available, so make the macros do nothing
#define _CLC_VSTORE_GENERIC_DECL(X, Y, Z, W, V, U)
#define _CLC_VECTOR_VSTORE_GENERIC_DECL(X, Y, Z, W, V)
#endif
#define _CLC_VECTOR_VSTORE_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local, RND) \
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND)
_CLC_VECTOR_VSTORE_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global, RND) \
_CLC_VECTOR_VSTORE_GENERIC_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __generic, RND)
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VSTORE_PRIM3(, PRIM_TYPE, PRIM_TYPE, )
@@ -29,10 +39,12 @@
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VSTORE_GENERIC_DECL(_half, half, PRIM_TYPE, , __generic, RND) \
_CLC_VECTOR_VSTORE_PRIM3(_half, half, PRIM_TYPE, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __private, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __local, RND) \
_CLC_VSTORE_DECL(a_half, half, PRIM_TYPE, , __global, RND) \
_CLC_VSTORE_GENERIC_DECL(a_half, half, PRIM_TYPE, , __generic, RND) \
_CLC_VECTOR_VSTORE_PRIM3(a_half, half, PRIM_TYPE, RND)
_CLC_VECTOR_VSTORE_PRIM1(char)
@@ -65,6 +77,8 @@ _CLC_VECTOR_VSTORE_PRIM1(half)
#endif
#undef _CLC_VSTORE_DECL
#undef _CLC_VSTORE_GENERIC_DECL
#undef _CLC_VECTOR_VSTORE_DECL
#undef _CLC_VECTOR_VSTORE_PRIM3
#undef _CLC_VECTOR_VSTORE_PRIM1
#undef _CLC_VECTOR_VSTORE_GENERIC_DECL

View File

@@ -13,11 +13,17 @@
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __generic);
#endif
#undef __CLC_OFFSET
#else
FUNC(, 1, __CLC_GENTYPE, __private);
FUNC(, 1, __CLC_GENTYPE, __local);
FUNC(, 1, __CLC_GENTYPE, __global);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(, 1, __CLC_GENTYPE, __generic);
#endif
#endif
#endif

View File

@@ -20,3 +20,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y,
local __CLC_INTN *q) {
return __clc_remquo(x, y, q);
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y,
generic __CLC_INTN *q) {
return __clc_remquo(x, y, q);
}
#endif

View File

@@ -51,11 +51,19 @@
*)(&x[16 * offset])); \
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VLOAD_VECTORIZE_GENERIC(X, Y)
#endif
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global)
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)
#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
@@ -129,3 +137,4 @@ VLOAD_ADDR_SPACES(half)
#undef VLOAD_TYPES
#undef VLOAD_ADDR_SPACES
#undef VLOAD_VECTORIZE
#undef VLOAD_VECTORIZE_GENERIC

View File

@@ -20,6 +20,9 @@ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __generic);
#endif
#undef __CLC_OFFSET
#else
@@ -27,5 +30,8 @@ FUNC(, 1, 1, __CLC_GENTYPE, __private);
FUNC(, 1, 1, __CLC_GENTYPE, __local);
FUNC(, 1, 1, __CLC_GENTYPE, __global);
FUNC(, 1, 1, __CLC_GENTYPE, __constant);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(, 1, 1, __CLC_GENTYPE, __generic);
#endif
#endif
#endif

View File

@@ -50,10 +50,18 @@
*)(&mem[16 * offset])) = vec; \
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define VSTORE_VECTORIZE_GENERIC VSTORE_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VSTORE_VECTORIZE_GENERIC(X, Y)
#endif
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global)
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic)
VSTORE_ADDR_SPACES(char)
VSTORE_ADDR_SPACES(uchar)
@@ -248,3 +256,4 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rte(double x) {
#undef DECLARE_HELPER
#undef VSTORE_ADDR_SPACES
#undef VSTORE_VECTORIZE
#undef VSTORE_VECTORIZE_GENERIC

View File

@@ -31,12 +31,19 @@ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local,
STORE_HALF_BUILTIN);
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global,
STORE_HALF_BUILTIN);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __generic,
STORE_HALF_BUILTIN);
#endif
#undef __CLC_OFFSET
#else
FUNC(, 1, 1, __CLC_GENTYPE, __private, STORE_HALF_BUILTIN);
FUNC(, 1, 1, __CLC_GENTYPE, __local, STORE_HALF_BUILTIN);
FUNC(, 1, 1, __CLC_GENTYPE, __global, STORE_HALF_BUILTIN);
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
FUNC(, 1, 1, __CLC_GENTYPE, __generic, STORE_HALF_BUILTIN);
#endif
#endif
#undef STORE_HALF_BUILTIN