[HIP] Allow std::malloc in device function
D106463 caused a regression that prevents std::malloc to be called in the device function, which is allowed with nvcc. Basically the standard C++ header introducing malloc in std namespace by using ::malloc. The device ::malloc function needs to be declared before using ::malloc to be introduced into std namespace. Revert D106463 and add a test. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D150965
This commit is contained in:
@@ -46,6 +46,67 @@ extern "C" {
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if __has_include("hip/hip_version.h")
|
||||
#include "hip/hip_version.h"
|
||||
#endif // __has_include("hip/hip_version.h")
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
|
||||
unsigned long long __pc);
|
||||
__device__ void __asan_free_impl(unsigned long long __addr,
|
||||
unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else // __has_feature(address_sanitizer)
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif // __HIP_ENABLE_DEVICE_MALLOC__
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
@@ -71,59 +132,6 @@ typedef __SIZE_TYPE__ size_t;
|
||||
#define INT_MAX __INTMAX_MAX__
|
||||
#endif // __HIPCC_RTC__
|
||||
|
||||
typedef __SIZE_TYPE__ __hip_size_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif //__cplusplus
|
||||
|
||||
#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
|
||||
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
|
||||
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
|
||||
#if __has_feature(address_sanitizer)
|
||||
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
|
||||
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
|
||||
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
return (void *)__asan_malloc_impl(__size, __pc);
|
||||
}
|
||||
__attribute__((noinline, weak)) __device__ void free(void *__ptr) {
|
||||
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
|
||||
__asan_free_impl((unsigned long long)__ptr, __pc);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return (void *) __ockl_dm_alloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__ockl_dm_dealloc((unsigned long long)__ptr);
|
||||
}
|
||||
#endif // __has_feature(address_sanitizer)
|
||||
#else // HIP version check
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
__device__ void *__hip_malloc(__hip_size_t __size);
|
||||
__device__ void *__hip_free(void *__ptr);
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
return __hip_malloc(__size);
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__hip_free(__ptr);
|
||||
}
|
||||
#else
|
||||
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
|
||||
__builtin_trap();
|
||||
return (void *)0;
|
||||
}
|
||||
__attribute__((weak)) inline __device__ void free(void *__ptr) {
|
||||
__builtin_trap();
|
||||
}
|
||||
#endif
|
||||
#endif // HIP version check
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif //__cplusplus
|
||||
|
||||
#include <__clang_hip_libdevice_declares.h>
|
||||
#include <__clang_hip_math.h>
|
||||
#include <__clang_hip_stdlib.h>
|
||||
|
||||
@@ -26,5 +26,7 @@ float fabs(float __x) { return __builtin_fabs(__x); }
|
||||
float abs(float __x) { return fabs(__x); }
|
||||
double abs(double __x) { return fabs(__x); }
|
||||
|
||||
using ::malloc;
|
||||
using ::free;
|
||||
}
|
||||
|
||||
|
||||
@@ -105,8 +105,6 @@ long lrint(double __a);
|
||||
long lrintf(float __a);
|
||||
long lround(double __a);
|
||||
long lroundf(float __a);
|
||||
int max(int __a, int __b);
|
||||
int min(int __a, int __b);
|
||||
double modf(double __a, double *__b);
|
||||
float modff(float __a, float *__b);
|
||||
double nearbyint(double __a);
|
||||
|
||||
0
clang/test/Headers/Inputs/include/sstream
Normal file
0
clang/test/Headers/Inputs/include/sstream
Normal file
0
clang/test/Headers/Inputs/include/stdexcept
Normal file
0
clang/test/Headers/Inputs/include/stdexcept
Normal file
@@ -31,7 +31,14 @@
|
||||
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
|
||||
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
|
||||
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
|
||||
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
|
||||
// RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
|
||||
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
|
||||
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
|
||||
// RUN: -internal-isystem %S/Inputs/include \
|
||||
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
|
||||
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
|
||||
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
|
||||
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
|
||||
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
|
||||
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
|
||||
// RUN: -internal-isystem %S/Inputs/include \
|
||||
@@ -40,6 +47,13 @@
|
||||
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
|
||||
// RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
|
||||
// RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
|
||||
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
|
||||
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
|
||||
// RUN: -internal-isystem %S/Inputs/include \
|
||||
// RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
|
||||
// RUN: -emit-llvm %s -o - \
|
||||
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
|
||||
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
@@ -133,9 +147,10 @@ __device__ double test_isnan() {
|
||||
|
||||
// Check that device malloc and free do not conflict with std headers.
|
||||
#include <cstdlib>
|
||||
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
|
||||
// CHECK: call {{.*}}ptr @malloc(i64
|
||||
// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
|
||||
// MALLOC-LABEL: define{{.*}}@_Z11test_malloc
|
||||
// MALLOC: call {{.*}}ptr @malloc(i64
|
||||
// MALLOC: call {{.*}}ptr @malloc(i64
|
||||
// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
|
||||
// MALLOC: call i64 @__ockl_dm_alloc
|
||||
// NOMALLOC: call void @llvm.trap
|
||||
// MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
|
||||
@@ -143,11 +158,13 @@ __device__ double test_isnan() {
|
||||
// MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
|
||||
__device__ void test_malloc(void *a) {
|
||||
a = malloc(42);
|
||||
a = std::malloc(42);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define{{.*}}@_Z9test_free
|
||||
// CHECK: call {{.*}}void @free(ptr
|
||||
// CHECK-LABEL: define weak {{.*}}void @free(ptr
|
||||
// MALLOC-LABEL: define{{.*}}@_Z9test_free
|
||||
// MALLOC: call {{.*}}void @free(ptr
|
||||
// MALLOC: call {{.*}}void @free(ptr
|
||||
// MALLOC-LABEL: define weak {{.*}}void @free(ptr
|
||||
// MALLOC: call void @__ockl_dm_dealloc
|
||||
// NOMALLOC: call void @llvm.trap
|
||||
// MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
|
||||
@@ -155,4 +172,17 @@ __device__ void test_malloc(void *a) {
|
||||
// MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
|
||||
__device__ void test_free(void *a) {
|
||||
free(a);
|
||||
std::free(a);
|
||||
}
|
||||
|
||||
// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
|
||||
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
|
||||
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
|
||||
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
|
||||
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
|
||||
void test_malloc_host(void *a) {
|
||||
a = malloc(42);
|
||||
free(a);
|
||||
a = std::malloc(42);
|
||||
std::free(a);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user