Adds a `check-offload-unit` target for running the liboffload unit test suite. This unit test binary runs the tests for every available device. This can optionally filtered to devices from a single platform, but the check target runs on everything. The target is not part of `check-offload` and does not get propagated to the top level build. I'm not sure if either of these things are desirable, but I'm happy to look into it if we want. Also remove the `offload/unittests/Plugins` test as it's dead code and doesn't build.
536 lines
18 KiB
C++
536 lines
18 KiB
C++
//===- ol_impl.cpp - Implementation of the new LLVM/Offload API ------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This contains the definitions of the new LLVM/Offload API entry points. See
|
|
// new-api/API/README.md for more information.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "OffloadImpl.hpp"
|
|
#include "Helpers.hpp"
|
|
#include "PluginManager.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include <OffloadAPI.h>
|
|
|
|
#include <mutex>
|
|
|
|
// TODO: Some plugins expect to be linked into libomptarget which defines these
|
|
// symbols to implement ompt callbacks. The least invasive workaround here is to
|
|
// define them in libLLVMOffload as false/null so they are never used. In future
|
|
// it would be better to allow the plugins to implement callbacks without
|
|
// pulling in details from libomptarget.
|
|
#ifdef OMPT_SUPPORT
|
|
namespace llvm::omp::target {
|
|
namespace ompt {
|
|
bool Initialized = false;
|
|
ompt_get_callback_t lookupCallbackByCode = nullptr;
|
|
ompt_function_lookup_t lookupCallbackByName = nullptr;
|
|
} // namespace ompt
|
|
} // namespace llvm::omp::target
|
|
#endif
|
|
|
|
using namespace llvm::omp::target;
|
|
using namespace llvm::omp::target::plugin;
|
|
|
|
// Handle type definitions. Ideally these would be 1:1 with the plugins, but
|
|
// we add some additional data here for now to avoid churn in the plugin
|
|
// interface.
|
|
struct ol_device_impl_t {
|
|
ol_device_impl_t(int DeviceNum, GenericDeviceTy *Device,
|
|
ol_platform_handle_t Platform)
|
|
: DeviceNum(DeviceNum), Device(Device), Platform(Platform) {}
|
|
int DeviceNum;
|
|
GenericDeviceTy *Device;
|
|
ol_platform_handle_t Platform;
|
|
};
|
|
|
|
struct ol_platform_impl_t {
|
|
ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
|
|
std::vector<ol_device_impl_t> Devices,
|
|
ol_platform_backend_t BackendType)
|
|
: Plugin(std::move(Plugin)), Devices(Devices), BackendType(BackendType) {}
|
|
std::unique_ptr<GenericPluginTy> Plugin;
|
|
std::vector<ol_device_impl_t> Devices;
|
|
ol_platform_backend_t BackendType;
|
|
};
|
|
|
|
struct ol_queue_impl_t {
|
|
ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device)
|
|
: AsyncInfo(AsyncInfo), Device(Device) {}
|
|
__tgt_async_info *AsyncInfo;
|
|
ol_device_handle_t Device;
|
|
};
|
|
|
|
struct ol_event_impl_t {
|
|
ol_event_impl_t(void *EventInfo, ol_queue_handle_t Queue)
|
|
: EventInfo(EventInfo), Queue(Queue) {}
|
|
~ol_event_impl_t() { (void)Queue->Device->Device->destroyEvent(EventInfo); }
|
|
void *EventInfo;
|
|
ol_queue_handle_t Queue;
|
|
};
|
|
|
|
struct ol_program_impl_t {
|
|
ol_program_impl_t(plugin::DeviceImageTy *Image,
|
|
std::unique_ptr<llvm::MemoryBuffer> ImageData,
|
|
const __tgt_device_image &DeviceImage)
|
|
: Image(Image), ImageData(std::move(ImageData)),
|
|
DeviceImage(DeviceImage) {}
|
|
plugin::DeviceImageTy *Image;
|
|
std::unique_ptr<llvm::MemoryBuffer> ImageData;
|
|
__tgt_device_image DeviceImage;
|
|
};
|
|
|
|
namespace llvm {
|
|
namespace offload {
|
|
|
|
struct AllocInfo {
|
|
ol_device_handle_t Device;
|
|
ol_alloc_type_t Type;
|
|
};
|
|
|
|
using AllocInfoMapT = DenseMap<void *, AllocInfo>;
|
|
AllocInfoMapT &allocInfoMap() {
|
|
static AllocInfoMapT AllocInfoMap{};
|
|
return AllocInfoMap;
|
|
}
|
|
|
|
using PlatformVecT = SmallVector<ol_platform_impl_t, 4>;
|
|
PlatformVecT &Platforms() {
|
|
static PlatformVecT Platforms;
|
|
return Platforms;
|
|
}
|
|
|
|
ol_device_handle_t HostDevice() {
|
|
// The host platform is always inserted last
|
|
return &Platforms().back().Devices[0];
|
|
}
|
|
|
|
template <typename HandleT> ol_impl_result_t olDestroy(HandleT Handle) {
|
|
delete Handle;
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) {
|
|
if (Name == "amdgpu") {
|
|
return OL_PLATFORM_BACKEND_AMDGPU;
|
|
} else if (Name == "cuda") {
|
|
return OL_PLATFORM_BACKEND_CUDA;
|
|
} else {
|
|
return OL_PLATFORM_BACKEND_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
// Every plugin exports this method to create an instance of the plugin type.
|
|
#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name();
|
|
#include "Shared/Targets.def"
|
|
|
|
void initPlugins() {
|
|
// Attempt to create an instance of each supported plugin.
|
|
#define PLUGIN_TARGET(Name) \
|
|
do { \
|
|
Platforms().emplace_back(ol_platform_impl_t{ \
|
|
std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), \
|
|
{}, \
|
|
pluginNameToBackend(#Name)}); \
|
|
} while (false);
|
|
#include "Shared/Targets.def"
|
|
|
|
// Preemptively initialize all devices in the plugin
|
|
for (auto &Platform : Platforms()) {
|
|
// Do not use the host plugin - it isn't supported.
|
|
if (Platform.BackendType == OL_PLATFORM_BACKEND_UNKNOWN)
|
|
continue;
|
|
auto Err = Platform.Plugin->init();
|
|
[[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
|
|
for (auto DevNum = 0; DevNum < Platform.Plugin->number_of_devices();
|
|
DevNum++) {
|
|
if (Platform.Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
|
|
Platform.Devices.emplace_back(ol_device_impl_t{
|
|
DevNum, &Platform.Plugin->getDevice(DevNum), &Platform});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add the special host device
|
|
auto &HostPlatform = Platforms().emplace_back(
|
|
ol_platform_impl_t{nullptr,
|
|
{ol_device_impl_t{-1, nullptr, nullptr}},
|
|
OL_PLATFORM_BACKEND_HOST});
|
|
HostDevice()->Platform = &HostPlatform;
|
|
|
|
offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE");
|
|
offloadConfig().ValidationEnabled =
|
|
!std::getenv("OFFLOAD_DISABLE_VALIDATION");
|
|
}
|
|
|
|
// TODO: We can properly reference count here and manage the resources in a more
|
|
// clever way
|
|
ol_impl_result_t olInit_impl() {
|
|
static std::once_flag InitFlag;
|
|
std::call_once(InitFlag, initPlugins);
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
ol_impl_result_t olShutDown_impl() { return OL_SUCCESS; }
|
|
|
|
ol_impl_result_t olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
|
|
ol_platform_info_t PropName,
|
|
size_t PropSize, void *PropValue,
|
|
size_t *PropSizeRet) {
|
|
ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
|
|
bool IsHost = Platform->BackendType == OL_PLATFORM_BACKEND_HOST;
|
|
|
|
switch (PropName) {
|
|
case OL_PLATFORM_INFO_NAME:
|
|
return ReturnValue(IsHost ? "Host" : Platform->Plugin->getName());
|
|
case OL_PLATFORM_INFO_VENDOR_NAME:
|
|
// TODO: Implement this
|
|
return ReturnValue("Unknown platform vendor");
|
|
case OL_PLATFORM_INFO_VERSION: {
|
|
return ReturnValue(formatv("v{0}.{1}.{2}", OL_VERSION_MAJOR,
|
|
OL_VERSION_MINOR, OL_VERSION_PATCH)
|
|
.str()
|
|
.c_str());
|
|
}
|
|
case OL_PLATFORM_INFO_BACKEND: {
|
|
return ReturnValue(Platform->BackendType);
|
|
}
|
|
default:
|
|
return OL_ERRC_INVALID_ENUMERATION;
|
|
}
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olGetPlatformInfo_impl(ol_platform_handle_t Platform,
|
|
ol_platform_info_t PropName,
|
|
size_t PropSize, void *PropValue) {
|
|
return olGetPlatformInfoImplDetail(Platform, PropName, PropSize, PropValue,
|
|
nullptr);
|
|
}
|
|
|
|
ol_impl_result_t olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
|
|
ol_platform_info_t PropName,
|
|
size_t *PropSizeRet) {
|
|
return olGetPlatformInfoImplDetail(Platform, PropName, 0, nullptr,
|
|
PropSizeRet);
|
|
}
|
|
|
|
ol_impl_result_t olGetDeviceInfoImplDetail(ol_device_handle_t Device,
|
|
ol_device_info_t PropName,
|
|
size_t PropSize, void *PropValue,
|
|
size_t *PropSizeRet) {
|
|
|
|
ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet);
|
|
|
|
// Find the info if it exists under any of the given names
|
|
auto GetInfo = [&](std::vector<std::string> Names) {
|
|
InfoQueueTy DevInfo;
|
|
if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
|
|
return std::string("");
|
|
|
|
for (auto Name : Names) {
|
|
auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
|
|
return Info.Key == Name;
|
|
};
|
|
auto Item = std::find_if(DevInfo.getQueue().begin(),
|
|
DevInfo.getQueue().end(), InfoKeyMatches);
|
|
|
|
if (Item != std::end(DevInfo.getQueue())) {
|
|
return Item->Value;
|
|
}
|
|
}
|
|
|
|
return std::string("");
|
|
};
|
|
|
|
switch (PropName) {
|
|
case OL_DEVICE_INFO_PLATFORM:
|
|
return ReturnValue(Device->Platform);
|
|
case OL_DEVICE_INFO_TYPE:
|
|
return ReturnValue(OL_DEVICE_TYPE_GPU);
|
|
case OL_DEVICE_INFO_NAME:
|
|
return ReturnValue(GetInfo({"Device Name"}).c_str());
|
|
case OL_DEVICE_INFO_VENDOR:
|
|
return ReturnValue(GetInfo({"Vendor Name"}).c_str());
|
|
case OL_DEVICE_INFO_DRIVER_VERSION:
|
|
return ReturnValue(
|
|
GetInfo({"CUDA Driver Version", "HSA Runtime Version"}).c_str());
|
|
default:
|
|
return OL_ERRC_INVALID_ENUMERATION;
|
|
}
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olGetDeviceInfo_impl(ol_device_handle_t Device,
|
|
ol_device_info_t PropName,
|
|
size_t PropSize, void *PropValue) {
|
|
return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue,
|
|
nullptr);
|
|
}
|
|
|
|
ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
|
|
ol_device_info_t PropName,
|
|
size_t *PropSizeRet) {
|
|
return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
|
|
}
|
|
|
|
ol_impl_result_t olIterateDevices_impl(ol_device_iterate_cb_t Callback,
|
|
void *UserData) {
|
|
for (auto &Platform : Platforms()) {
|
|
for (auto &Device : Platform.Devices) {
|
|
if (!Callback(&Device, UserData)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
|
|
switch (Type) {
|
|
case OL_ALLOC_TYPE_DEVICE:
|
|
return TARGET_ALLOC_DEVICE;
|
|
case OL_ALLOC_TYPE_HOST:
|
|
return TARGET_ALLOC_HOST;
|
|
case OL_ALLOC_TYPE_MANAGED:
|
|
default:
|
|
return TARGET_ALLOC_SHARED;
|
|
}
|
|
}
|
|
|
|
ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
|
|
ol_alloc_type_t Type, size_t Size,
|
|
void **AllocationOut) {
|
|
auto Alloc =
|
|
Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
|
|
if (!Alloc)
|
|
return {OL_ERRC_OUT_OF_RESOURCES,
|
|
formatv("Could not create allocation on device {0}", Device).str()};
|
|
|
|
*AllocationOut = *Alloc;
|
|
allocInfoMap().insert_or_assign(*Alloc, AllocInfo{Device, Type});
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olMemFree_impl(void *Address) {
|
|
if (!allocInfoMap().contains(Address))
|
|
return {OL_ERRC_INVALID_ARGUMENT, "Address is not a known allocation"};
|
|
|
|
auto AllocInfo = allocInfoMap().at(Address);
|
|
auto Device = AllocInfo.Device;
|
|
auto Type = AllocInfo.Type;
|
|
|
|
auto Res =
|
|
Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
|
|
if (Res)
|
|
return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
|
|
|
|
allocInfoMap().erase(Address);
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
|
|
ol_queue_handle_t *Queue) {
|
|
auto CreatedQueue = std::make_unique<ol_queue_impl_t>(nullptr, Device);
|
|
auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
|
|
if (Err)
|
|
return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
|
|
|
|
*Queue = CreatedQueue.release();
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olDestroyQueue_impl(ol_queue_handle_t Queue) {
|
|
return olDestroy(Queue);
|
|
}
|
|
|
|
ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue) {
|
|
// Host plugin doesn't have a queue set so it's not safe to call synchronize
|
|
// on it, but we have nothing to synchronize in that situation anyway.
|
|
if (Queue->AsyncInfo->Queue) {
|
|
auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo);
|
|
if (Err)
|
|
return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
|
|
}
|
|
|
|
// Recreate the stream resource so the queue can be reused
|
|
// TODO: Would be easier for the synchronization to (optionally) not release
|
|
// it to begin with.
|
|
auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo);
|
|
if (Res)
|
|
return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
|
|
auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo);
|
|
if (Res)
|
|
return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event) {
|
|
return olDestroy(Event);
|
|
}
|
|
|
|
ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
|
|
auto EventImpl = std::make_unique<ol_event_impl_t>(nullptr, Queue);
|
|
auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
|
|
if (Res)
|
|
return nullptr;
|
|
|
|
Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
|
|
Queue->AsyncInfo);
|
|
if (Res)
|
|
return nullptr;
|
|
|
|
return EventImpl.release();
|
|
}
|
|
|
|
ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
|
|
ol_device_handle_t DstDevice, void *SrcPtr,
|
|
ol_device_handle_t SrcDevice, size_t Size,
|
|
ol_event_handle_t *EventOut) {
|
|
if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
|
|
if (!Queue) {
|
|
std::memcpy(DstPtr, SrcPtr, Size);
|
|
return OL_SUCCESS;
|
|
} else {
|
|
return {OL_ERRC_INVALID_ARGUMENT,
|
|
"One of DstDevice and SrcDevice must be a non-host device if "
|
|
"Queue is specified"};
|
|
}
|
|
}
|
|
|
|
// If no queue is given the memcpy will be synchronous
|
|
auto QueueImpl = Queue ? Queue->AsyncInfo : nullptr;
|
|
|
|
if (DstDevice == HostDevice()) {
|
|
auto Res = SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, QueueImpl);
|
|
if (Res)
|
|
return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
|
|
} else if (SrcDevice == HostDevice()) {
|
|
auto Res = DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl);
|
|
if (Res)
|
|
return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
|
|
} else {
|
|
auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
|
|
DstPtr, Size, QueueImpl);
|
|
if (Res)
|
|
return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
|
|
}
|
|
|
|
if (EventOut)
|
|
*EventOut = makeEvent(Queue);
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
|
|
const void *ProgData, size_t ProgDataSize,
|
|
ol_program_handle_t *Program) {
|
|
// Make a copy of the program binary in case it is released by the caller.
|
|
auto ImageData = MemoryBuffer::getMemBufferCopy(
|
|
StringRef(reinterpret_cast<const char *>(ProgData), ProgDataSize));
|
|
|
|
auto DeviceImage = __tgt_device_image{
|
|
const_cast<char *>(ImageData->getBuffer().data()),
|
|
const_cast<char *>(ImageData->getBuffer().data()) + ProgDataSize, nullptr,
|
|
nullptr};
|
|
|
|
ol_program_handle_t Prog =
|
|
new ol_program_impl_t(nullptr, std::move(ImageData), DeviceImage);
|
|
|
|
auto Res =
|
|
Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
|
|
if (!Res) {
|
|
delete Prog;
|
|
return OL_ERRC_INVALID_VALUE;
|
|
}
|
|
|
|
Prog->Image = *Res;
|
|
*Program = Prog;
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t olDestroyProgram_impl(ol_program_handle_t Program) {
|
|
return olDestroy(Program);
|
|
}
|
|
|
|
ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
|
|
const char *KernelName,
|
|
ol_kernel_handle_t *Kernel) {
|
|
|
|
auto &Device = Program->Image->getDevice();
|
|
auto KernelImpl = Device.constructKernel(KernelName);
|
|
if (!KernelImpl)
|
|
return OL_ERRC_INVALID_KERNEL_NAME;
|
|
|
|
auto Err = KernelImpl->init(Device, *Program->Image);
|
|
if (Err)
|
|
return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
|
|
|
|
*Kernel = &*KernelImpl;
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
ol_impl_result_t
|
|
olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
|
|
ol_kernel_handle_t Kernel, const void *ArgumentsData,
|
|
size_t ArgumentsSize,
|
|
const ol_kernel_launch_size_args_t *LaunchSizeArgs,
|
|
ol_event_handle_t *EventOut) {
|
|
auto *DeviceImpl = Device->Device;
|
|
if (Queue && Device != Queue->Device) {
|
|
return {OL_ERRC_INVALID_DEVICE,
|
|
"Device specified does not match the device of the given queue"};
|
|
}
|
|
|
|
auto *QueueImpl = Queue ? Queue->AsyncInfo : nullptr;
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(*DeviceImpl, QueueImpl);
|
|
KernelArgsTy LaunchArgs{};
|
|
LaunchArgs.NumTeams[0] = LaunchSizeArgs->NumGroupsX;
|
|
LaunchArgs.NumTeams[1] = LaunchSizeArgs->NumGroupsY;
|
|
LaunchArgs.NumTeams[2] = LaunchSizeArgs->NumGroupsZ;
|
|
LaunchArgs.ThreadLimit[0] = LaunchSizeArgs->GroupSizeX;
|
|
LaunchArgs.ThreadLimit[1] = LaunchSizeArgs->GroupSizeY;
|
|
LaunchArgs.ThreadLimit[2] = LaunchSizeArgs->GroupSizeZ;
|
|
LaunchArgs.DynCGroupMem = LaunchSizeArgs->DynSharedMemory;
|
|
|
|
KernelLaunchParamsTy Params;
|
|
Params.Data = const_cast<void *>(ArgumentsData);
|
|
Params.Size = ArgumentsSize;
|
|
LaunchArgs.ArgPtrs = reinterpret_cast<void **>(&Params);
|
|
// Don't do anything with pointer indirection; use arg data as-is
|
|
LaunchArgs.Flags.IsCUDA = true;
|
|
|
|
auto *KernelImpl = reinterpret_cast<GenericKernelTy *>(Kernel);
|
|
auto Err = KernelImpl->launch(*DeviceImpl, LaunchArgs.ArgPtrs, nullptr,
|
|
LaunchArgs, AsyncInfoWrapper);
|
|
|
|
AsyncInfoWrapper.finalize(Err);
|
|
if (Err)
|
|
return {OL_ERRC_UNKNOWN, "Could not finalize the AsyncInfoWrapper"};
|
|
|
|
if (EventOut)
|
|
*EventOut = makeEvent(Queue);
|
|
|
|
return OL_SUCCESS;
|
|
}
|
|
|
|
} // namespace offload
|
|
} // namespace llvm
|