[libclc] Add v3 variants of async_work_group_copy/async_work_group_strided_copy/prefetch (#137932)

3-component vector type is supported for them per OpenCL spec.
This commit is contained in:
Wenju He
2025-04-30 12:19:08 +00:00
committed by GitHub
parent 101fd87f98
commit 5b6fc61091
7 changed files with 43 additions and 251 deletions

View File

@@ -9,15 +9,21 @@
#define __CLC_DST_ADDR_SPACE local
#define __CLC_SRC_ADDR_SPACE global
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
#define __CLC_DST_ADDR_SPACE global
#define __CLC_SRC_ADDR_SPACE local
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <clc/async/async_work_group_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY

View File

@@ -9,15 +9,21 @@
#define __CLC_DST_ADDR_SPACE local
#define __CLC_SRC_ADDR_SPACE global
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY
#define __CLC_DST_ADDR_SPACE global
#define __CLC_SRC_ADDR_SPACE local
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef __CLC_DST_ADDR_SPACE
#undef __CLC_SRC_ADDR_SPACE
#undef __CLC_BODY

View File

@@ -1,239 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define __CLC_GENTYPE char
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE char16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uchar16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE short16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ushort16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE long16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE ulong16
#include __CLC_BODY
#undef __CLC_GENTYPE
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define __CLC_GENTYPE double
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double16
#include __CLC_BODY
#undef __CLC_GENTYPE
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16: enable
#define __CLC_GENTYPE half
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half16
#include __CLC_BODY
#undef __CLC_GENTYPE
#endif
#undef __CLC_BODY

View File

@@ -7,5 +7,9 @@
//===----------------------------------------------------------------------===//
#define __CLC_BODY <clc/async/prefetch.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <clc/async/prefetch.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY

View File

@@ -9,4 +9,9 @@
#include <clc/clc.h>
#define __CLC_BODY <async_work_group_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <async_work_group_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY

View File

@@ -9,4 +9,9 @@
#include <clc/clc.h>
#define __CLC_BODY <async_work_group_strided_copy.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <async_work_group_strided_copy.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY

View File

@@ -9,4 +9,9 @@
#include <clc/clc.h>
#define __CLC_BODY <prefetch.inc>
#include <clc/async/gentype.inc>
#include <clc/integer/gentype.inc>
#undef __CLC_BODY
#define __CLC_BODY <prefetch.inc>
#include <clc/math/gentype.inc>
#undef __CLC_BODY