Files
clang-p2996/clang/test/CodeGenCUDA/correctly-rounded-div.cu
Matt Arsenault bac2a07540 clang: Attach !fpmath metadata to __builtin_sqrt based on language flags
OpenCL and HIP have -cl-fp32-correctly-rounded-divide-sqrt and
-fno-hip-correctly-rounded-divide-sqrt. The corresponding fpmath metadata
was only set on fdiv, and not sqrt. The backend is currently underutilizing
sqrt lowering options, and the responsibility is split between the libraries
and backend and this metadata is needed.

CUDA/NVCC has -prec-div and -prev-sqrt but clang doesn't appear to be
aiming for compatibility with those. Don't know if OpenMP has a similar
control.
2023-07-14 18:46:18 -04:00

50 lines
1.4 KiB
Plaintext

// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
// RUN: | FileCheck --check-prefixes=COMMON,CRDIV %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \
// RUN: | FileCheck --check-prefixes=COMMON,NCRDIV %s
#include "Inputs/cuda.h"
typedef __attribute__(( ext_vector_type(4) )) float float4;
// COMMON-LABEL: @_Z11spscalardiv
// COMMON: fdiv{{.*}},
// NCRDIV: !fpmath ![[MD:[0-9]+]]
// CRDIV-NOT: !fpmath
__device__ float spscalardiv(float a, float b) {
return a / b;
}
// COMMON-LABEL: @_Z11spvectordiv
// COMMON: fdiv{{.*}},
// NCRDIV: !fpmath ![[MD]]
// CRDIV-NOT: !fpmath
__device__ float4 spvectordiv(float4 a, float4 b) {
return a / b;
}
// COMMON-LABEL: @_Z11dpscalardiv
// COMMON-NOT: !fpmath
__device__ double dpscalardiv(double a, double b) {
return a / b;
}
// COMMON-LABEL: @_Z12spscalarsqrt
// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
__device__ float spscalarsqrt(float a) {
return __builtin_sqrtf(a);
}
// COMMON-LABEL: @_Z12dpscalarsqrt
// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
// COMMON-NOT: !fpmath
__device__ double dpscalarsqrt(double a) {
return __builtin_sqrt(a);
}
// NCRSQRT: ![[MD]] = !{float 2.500000e+00}