CUDA requires that static variables be visible to the host when offloading. However, The standard semantics of a stiatc variable dictate that it should not be visible outside of the current file. In order to access it from the host we need to perform "externalization" on the static variable on the device. This requires generating a semi-unique name that can be affixed to the variable as to not cause linker errors. This is currently done using the CUID functionality, an MD5 hash value set up by the clang driver. This allows us to achieve is mostly unique ID that is unique even between multiple compilations of the same file. However, this is not always availible. Instead, this patch uses the unique ID from the file to generate a unique symbol name. This will create a unique name that is consistent between the host and device side compilations without requiring the CUID to be entered by the driver. The one downside to this is that we are no longer stable under multiple compilations of the same file. However, this is a very niche use-case and is not supported by Nvidia's CUDA compiler so it likely to be good enough. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D125904
33 lines
1.3 KiB
Plaintext
33 lines
1.3 KiB
Plaintext
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -emit-llvm -o - %s \
|
|
// RUN: | FileCheck -check-prefix=NORDC %s
|
|
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -emit-llvm -o - %s \
|
|
// RUN: | FileCheck -check-prefix=NORDC-NEG %s
|
|
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -fgpu-rdc -emit-llvm -o - %s \
|
|
// RUN: | FileCheck -check-prefix=RDC %s
|
|
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -fgpu-rdc -emit-llvm -o - %s \
|
|
// RUN: | FileCheck -check-prefix=RDC-NEG %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
template <typename T> __device__ void func() {}
|
|
template <typename T> __global__ void kernel() {}
|
|
|
|
template __device__ void func<int>();
|
|
// NORDC: define internal void @_Z4funcIiEvv()
|
|
// RDC: define weak_odr void @_Z4funcIiEvv()
|
|
|
|
template __global__ void kernel<int>();
|
|
// NORDC: define void @_Z6kernelIiEvv()
|
|
// RDC: define weak_odr void @_Z6kernelIiEvv()
|
|
|
|
// Ensure that unused static device function is eliminated
|
|
static __device__ void static_func() {}
|
|
// NORDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv()
|
|
// RDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv[[FILEID:.*]]()
|
|
|
|
// Ensure that kernel function has external or weak_odr
|
|
// linkage regardless static specifier
|
|
static __global__ void static_kernel() {}
|
|
// NORDC: define void @_ZL13static_kernelv()
|
|
// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()
|