Summary: This patch adds support for registering texture / surface variables from CUDA / HIP. Additionally, we now properly track the `extern` and `const` flags that are also used in these runtime functions. This does not implement the `managed` variables yet as those seem to require some extra handling I'm not familiar with. The issue is that the current offload entry isn't large enough to carry size and alignment information along with an extra global.
140 lines
9.3 KiB
Plaintext
140 lines
9.3 KiB
Plaintext
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".omp_offloading.entry.*"
|
|
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
|
|
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
|
|
// RUN: --check-prefix=CUDA %s
|
|
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
|
|
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
|
|
// RUN: --check-prefix=HIP %s
|
|
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
|
|
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
|
|
// RUN: --check-prefix=CUDA-COFF %s
|
|
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
|
|
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
|
|
// RUN: --check-prefix=HIP-COFF %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
//.
|
|
// CUDA: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
|
// CUDA: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
|
// CUDA: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
|
|
// CUDA: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
|
// CUDA: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
|
|
// CUDA: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
|
|
// CUDA: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
|
|
// CUDA: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
|
|
// CUDA: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
|
|
// CUDA: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
|
|
//.
|
|
// HIP: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
|
// HIP: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
|
// HIP: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
|
|
// HIP: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
|
// HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
|
|
// HIP: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
|
|
// HIP: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
|
|
// HIP: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
|
|
// HIP: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
|
|
// HIP: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
|
|
//.
|
|
// CUDA-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
|
// CUDA-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
|
|
// CUDA-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
|
|
// CUDA-COFF: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
|
|
// CUDA-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
|
|
// CUDA-COFF: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
|
|
// CUDA-COFF: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
|
|
// CUDA-COFF: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
|
|
// CUDA-COFF: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
|
|
// CUDA-COFF: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
|
|
//.
|
|
// HIP-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
|
|
// HIP-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
|
|
// HIP-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
|
|
// HIP-COFF: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
|
|
// HIP-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
|
|
// HIP-COFF: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
|
|
// HIP-COFF: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
|
|
// HIP-COFF: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
|
|
// HIP-COFF: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
|
|
// HIP-COFF: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
|
|
//.
|
|
// CUDA-LABEL: @_Z18__device_stub__foov(
|
|
// CUDA-NEXT: entry:
|
|
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
|
|
// CUDA-NEXT: br label [[SETUP_END:%.*]]
|
|
// CUDA: setup.end:
|
|
// CUDA-NEXT: ret void
|
|
//
|
|
// HIP-LABEL: @_Z18__device_stub__foov(
|
|
// HIP-NEXT: entry:
|
|
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
|
|
// HIP-NEXT: br label [[SETUP_END:%.*]]
|
|
// HIP: setup.end:
|
|
// HIP-NEXT: ret void
|
|
//
|
|
// CUDA-COFF-LABEL: @_Z18__device_stub__foov(
|
|
// CUDA-COFF-NEXT: entry:
|
|
// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
|
|
// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]]
|
|
// CUDA-COFF: setup.end:
|
|
// CUDA-COFF-NEXT: ret void
|
|
//
|
|
// HIP-COFF-LABEL: @_Z18__device_stub__foov(
|
|
// HIP-COFF-NEXT: entry:
|
|
// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
|
|
// HIP-COFF-NEXT: br label [[SETUP_END:%.*]]
|
|
// HIP-COFF: setup.end:
|
|
// HIP-COFF-NEXT: ret void
|
|
//
|
|
__global__ void foo() {}
|
|
__device__ int var = 1;
|
|
const __device__ int constant = 1;
|
|
extern __device__ int external;
|
|
|
|
// CUDA-LABEL: @_Z21__device_stub__kernelv(
|
|
// CUDA-NEXT: entry:
|
|
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv)
|
|
// CUDA-NEXT: br label [[SETUP_END:%.*]]
|
|
// CUDA: setup.end:
|
|
// CUDA-NEXT: ret void
|
|
//
|
|
// HIP-LABEL: @_Z21__device_stub__kernelv(
|
|
// HIP-NEXT: entry:
|
|
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
|
|
// HIP-NEXT: br label [[SETUP_END:%.*]]
|
|
// HIP: setup.end:
|
|
// HIP-NEXT: ret void
|
|
//
|
|
// CUDA-COFF-LABEL: @_Z21__device_stub__kernelv(
|
|
// CUDA-COFF-NEXT: entry:
|
|
// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv)
|
|
// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]]
|
|
// CUDA-COFF: setup.end:
|
|
// CUDA-COFF-NEXT: ret void
|
|
//
|
|
// HIP-COFF-LABEL: @_Z21__device_stub__kernelv(
|
|
// HIP-COFF-NEXT: entry:
|
|
// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
|
|
// HIP-COFF-NEXT: br label [[SETUP_END:%.*]]
|
|
// HIP-COFF: setup.end:
|
|
// HIP-COFF-NEXT: ret void
|
|
//
|
|
__global__ void kernel() { external = 1; }
|
|
|
|
struct surfaceReference { int desc; };
|
|
|
|
template <typename T, int dim = 1>
|
|
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
|
|
|
|
surface<void> surf;
|
|
|
|
struct textureReference {
|
|
int desc;
|
|
};
|
|
|
|
template <typename T, int dim = 1, int mode = 0>
|
|
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
|
|
|
|
texture<void> tex;
|