[NFC][libclc] Simplify clc_dot and dot implementation (#142922)
llvm-diff shows no change to amdgcn--amdhsa.bc
This commit is contained in:
@@ -7,59 +7,7 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include <clc/internal/clc.h>
|
#include <clc/internal/clc.h>
|
||||||
|
#include <clc/math/clc_fma.h>
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }
|
#define __CLC_BODY <clc_dot.inc>
|
||||||
|
#include <clc/math/gentype.inc>
|
||||||
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef cl_khr_fp64
|
|
||||||
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
|
|
||||||
return p0 * p1;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef cl_khr_fp16
|
|
||||||
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
|
|
||||||
return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|||||||
25
libclc/clc/lib/generic/geometric/clc_dot.inc
Normal file
25
libclc/clc/lib/generic/geometric/clc_dot.inc
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
|
||||||
|
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
|
||||||
|
|
||||||
|
_CLC_OVERLOAD _CLC_DEF __CLC_SCALAR_GENTYPE __clc_dot(__CLC_GENTYPE x,
|
||||||
|
__CLC_GENTYPE y) {
|
||||||
|
#if __CLC_VECSIZE_OR_1 == 1
|
||||||
|
return x * y;
|
||||||
|
#elif __CLC_VECSIZE_OR_1 == 2
|
||||||
|
return x.s0 * y.s0 + x.s1 * y.s1;
|
||||||
|
#elif __CLC_VECSIZE_OR_1 == 3
|
||||||
|
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2;
|
||||||
|
#else
|
||||||
|
return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2 + x.s3 * y.s3;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -9,60 +9,6 @@
|
|||||||
#include <clc/geometric/clc_dot.h>
|
#include <clc/geometric/clc_dot.h>
|
||||||
#include <clc/opencl/clc.h>
|
#include <clc/opencl/clc.h>
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
|
#define FUNCTION dot
|
||||||
return __clc_dot(p0, p1);
|
#define __CLC_BODY <clc/geometric/binary_def.inc>
|
||||||
}
|
#include <clc/math/gentype.inc>
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef cl_khr_fp64
|
|
||||||
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef cl_khr_fp16
|
|
||||||
|
|
||||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
|
|
||||||
return __clc_dot(p0, p1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|||||||
Reference in New Issue
Block a user