[libclc] Move cross to CLC library; add missing half overloads (#139713)

The half overloads are trivially identical to the float and double ones.

It didn't seem worth using 'gentype' for the OpenCL layer or CLC
declarations so they're just written out explicitly. It does help avoid
less trivial repetition in the CLC implementation, though.
This commit is contained in:
Fraser Cormack
2025-05-13 17:07:07 +01:00
committed by GitHub
parent eaa45dc622
commit 7a4af40896
5 changed files with 88 additions and 8 deletions

View File

@@ -0,0 +1,31 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __CLC_GEOMETRIC_CLC_CROSS_H__
#define __CLC_GEOMETRIC_CLC_CROSS_H__
_CLC_OVERLOAD _CLC_DECL float3 __clc_cross(float3 p0, float3 p1);
_CLC_OVERLOAD _CLC_DECL float4 __clc_cross(float4 p0, float4 p1);
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DECL double3 __clc_cross(double3 p0, double3 p1);
_CLC_OVERLOAD _CLC_DECL double4 __clc_cross(double4 p0, double4 p1);
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD _CLC_DECL half3 __clc_cross(half3 p0, half3 p1);
_CLC_OVERLOAD _CLC_DECL half4 __clc_cross(half4 p0, half4 p1);
#endif
#endif // __CLC_GEOMETRIC_CLC_CROSS_H__

View File

@@ -2,6 +2,7 @@ common/clc_degrees.cl
common/clc_radians.cl
common/clc_sign.cl
common/clc_smoothstep.cl
geometric/clc_cross.cl
geometric/clc_distance.cl
geometric/clc_dot.cl
geometric/clc_fast_distance.cl

View File

@@ -0,0 +1,12 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/internal/clc.h>
#define __CLC_BODY <clc_cross.inc>
#include <clc/math/gentype.inc>

View File

@@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#if __CLC_VECSIZE_OR_1 == 3
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cross(__CLC_GENTYPE p0,
__CLC_GENTYPE p1) {
return (__CLC_GENTYPE)(p0.y * p1.z - p0.z * p1.y, p0.z * p1.x - p0.x * p1.z,
p0.x * p1.y - p0.y * p1.x);
}
#elif __CLC_VECSIZE_OR_1 == 4
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cross(__CLC_GENTYPE p0,
__CLC_GENTYPE p1) {
return (__CLC_GENTYPE)(p0.y * p1.z - p0.z * p1.y, p0.z * p1.x - p0.x * p1.z,
p0.x * p1.y - p0.y * p1.x, 0.0F);
}
#endif

View File

@@ -7,27 +7,38 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
#include <clc/geometric/clc_cross.h>
_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x);
return __clc_cross(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x, 0.f);
return __clc_cross(p0, p1);
}
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) {
return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x);
return __clc_cross(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) {
return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x, 0.f);
return __clc_cross(p0, p1);
}
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD _CLC_DEF half3 cross(half3 p0, half3 p1) {
return __clc_cross(p0, p1);
}
_CLC_OVERLOAD _CLC_DEF half4 cross(half4 p0, half4 p1) {
return __clc_cross(p0, p1);
}
#endif