Files
clang-p2996/clang/test/CodeGenCUDA/offloading-entries.cu
Joseph Huber f1e917d07b [Offload] Unify offloading entries into a single section (#125731)
Summary:
This patch unifies the existing offloading entires into a single section
called `llvm_offload_entires`. This lets us use a more unified
offloading infrastructure so that all targets share the same handling.
The effect is that people in the runtimes now need to check if the kind
is what they expect, but the expectation is that you can combine
multiple potential providers into a compile job. Doesn't fully work
yet because of other runtime issues, but some day. Mostly this helps the
future of liboffload where we want to handle different languages than
OpenMP.
2025-02-06 08:24:01 -06:00

157 lines
12 KiB
Plaintext

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA-COFF %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP-COFF %s
#include "Inputs/cuda.h"
#define __managed__ __attribute__((managed))
//.
// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries"
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading"
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE"
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
// CUDA-NEXT: br label [[SETUP_END:%.*]]
// CUDA: setup.end:
// CUDA-NEXT: ret void
//
// HIP-LABEL: @_Z18__device_stub__foov(
// HIP-NEXT: entry:
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
// HIP-NEXT: br label [[SETUP_END:%.*]]
// HIP: setup.end:
// HIP-NEXT: ret void
//
// CUDA-COFF-LABEL: @_Z18__device_stub__foov(
// CUDA-COFF-NEXT: entry:
// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]]
// CUDA-COFF: setup.end:
// CUDA-COFF-NEXT: ret void
//
// HIP-COFF-LABEL: @_Z18__device_stub__foov(
// HIP-COFF-NEXT: entry:
// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
// HIP-COFF-NEXT: br label [[SETUP_END:%.*]]
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
__global__ void foo() {}
__device__ int var = 1;
const __device__ int constant = 1;
extern __device__ int external;
__device__ __managed__ int managed = 0;
// CUDA-LABEL: @_Z21__device_stub__kernelv(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv)
// CUDA-NEXT: br label [[SETUP_END:%.*]]
// CUDA: setup.end:
// CUDA-NEXT: ret void
//
// HIP-LABEL: @_Z21__device_stub__kernelv(
// HIP-NEXT: entry:
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
// HIP-NEXT: br label [[SETUP_END:%.*]]
// HIP: setup.end:
// HIP-NEXT: ret void
//
// CUDA-COFF-LABEL: @_Z21__device_stub__kernelv(
// CUDA-COFF-NEXT: entry:
// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv)
// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]]
// CUDA-COFF: setup.end:
// CUDA-COFF-NEXT: ret void
//
// HIP-COFF-LABEL: @_Z21__device_stub__kernelv(
// HIP-COFF-NEXT: entry:
// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv)
// HIP-COFF-NEXT: br label [[SETUP_END:%.*]]
// HIP-COFF: setup.end:
// HIP-COFF-NEXT: ret void
//
__global__ void kernel() { external = 1; }
struct surfaceReference { int desc; };
template <typename T, int dim = 1>
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
surface<void> surf;
struct textureReference {
int desc;
};
template <typename T, int dim = 1, int mode = 0>
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
texture<void> tex;