Files
clang-p2996/libclc/opencl/lib/generic/shared/vload.cl
Fraser Cormack 94142d9bb0 [libclc] Support the generic address space (#137183)
This commit provides definitions of builtins with the generic address
space.

One concept to consider is the difference between supporting the generic
address space from the user's perspective and the requirement for libclc
as a compiler implementation detail to define separate generic address
space builtins. In practice a target (like NVPTX) might notionally
support the generic address space, but it's mapped to the same LLVM
target address space as another address space (often the private one).

In such cases libclc must be careful not to define both private and
generic overloads of the same builtin. We track these two concepts
separately, and make the assumption that if the generic address space
does clash with another, it's with the private one. We track the
concepts separately because there are some builtins such as atomics that
are defined for the generic address space but not the private address
space.
2025-05-21 17:50:00 +01:00

141 lines
7.8 KiB
Common Lisp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/clc.h>
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, \
const ADDR_SPACE PRIM_TYPE *x) { \
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
*)(&x[2 * offset])); \
} \
\
typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 \
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, \
const ADDR_SPACE PRIM_TYPE *x) { \
PRIM_TYPE##2 vec = \
*((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
*)(&x[3 * offset])); \
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset * 3 + 2]); \
} \
\
typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, \
const ADDR_SPACE PRIM_TYPE *x) { \
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
*)(&x[4 * offset])); \
} \
\
typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, \
const ADDR_SPACE PRIM_TYPE *x) { \
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
*)(&x[8 * offset])); \
} \
\
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16( \
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
*)(&x[16 * offset])); \
}
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
#else
// The generic address space isn't available, so make the macro do nothing
#define VLOAD_VECTORIZE_GENERIC(X, Y)
#endif
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
VLOAD_VECTORIZE_GENERIC(__CLC_SCALAR_GENTYPE, __generic)
#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
VLOAD_ADDR_SPACES(uchar) \
VLOAD_ADDR_SPACES(short) \
VLOAD_ADDR_SPACES(ushort) \
VLOAD_ADDR_SPACES(int) \
VLOAD_ADDR_SPACES(uint) \
VLOAD_ADDR_SPACES(long) \
VLOAD_ADDR_SPACES(ulong) \
VLOAD_ADDR_SPACES(float)
VLOAD_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VLOAD_ADDR_SPACES(double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
VLOAD_ADDR_SPACES(half)
#endif
/* vload_half are legal even without cl_khr_fp16 */
/* no vload_half for double */
#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
#define VEC_LOAD2(val, AS) \
VEC_LOAD1(val.lo, AS) \
VEC_LOAD1(val.hi, AS)
#define VEC_LOAD3(val, AS) \
VEC_LOAD1(val.s0, AS) \
VEC_LOAD1(val.s1, AS) \
VEC_LOAD1(val.s2, AS)
#define VEC_LOAD4(val, AS) \
VEC_LOAD2(val.lo, AS) \
VEC_LOAD2(val.hi, AS)
#define VEC_LOAD8(val, AS) \
VEC_LOAD4(val.lo, AS) \
VEC_LOAD4(val.hi, AS)
#define VEC_LOAD16(val, AS) \
VEC_LOAD8(val.lo, AS) \
VEC_LOAD8(val.hi, AS)
#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, \
const AS half *mem) { \
offset *= VEC_SIZE; \
TYPE __tmp; \
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
} \
_CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, \
const AS half *mem) { \
offset *= OFFSET_SIZE; \
TYPE __tmp; \
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
}
#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
__FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
#define __CLC_BODY "vload_half.inc"
#include <clc/math/gentype.inc>
#undef FUNC
#undef __FUNC
#undef VEC_LOAD16
#undef VEC_LOAD8
#undef VEC_LOAD4
#undef VEC_LOAD3
#undef VEC_LOAD2
#undef VEC_LOAD1
#undef VLOAD_TYPES
#undef VLOAD_ADDR_SPACES
#undef VLOAD_VECTORIZE
#undef VLOAD_VECTORIZE_GENERIC