Files
clang-p2996/openmp/libomptarget/plugins-nextgen/common/PluginInterface/RPC.cpp
Joseph Huber 8a0763f19c [Libomptarget] Remove RPCHandleTy indirection
The 'RPCHandleTy' was intended to capture the intention that a specific
device owns its slot in the RPC server. However, this required creating
a temporary store to hold these pointers. This was causing really weird
spurious failure due to undefined behaviour in the order of library
teardown. For example, the x64 plugin would be torn down, set this to
some invalid memory, and then the CUDA plugin would crash. Rather than
spend the time to fully diagnose this problem I found it pertinent to
simply remove the failure mode.

This patch removes this indirection so now the usage of the RPC server
must always be done with the intended device. This just requires some
extra handling for the AMDGPU indirection where we need to store a
reference to the device.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D154971
2023-07-11 10:54:40 -05:00

155 lines
5.4 KiB
C++

//===- RPC.h - Interface for remote procedure calls from the GPU ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "RPC.h"
#include "Debug.h"
#include "PluginInterface.h"
// This header file may be present in-tree or from an LLVM installation. The
// installed version lives alongside the GPU headers so we do not want to
// include it directly.
#if __has_include(<gpu-none-llvm/rpc_server.h>)
#include <gpu-none-llvm/rpc_server.h>
#elif defined(LIBOMPTARGET_RPC_SUPPORT)
#include <rpc_server.h>
#endif
using namespace llvm;
using namespace omp;
using namespace target;
RPCServerTy::RPCServerTy(uint32_t NumDevices) {
#ifdef LIBOMPTARGET_RPC_SUPPORT
// If this fails then something is catastrophically wrong, just exit.
if (rpc_status_t Err = rpc_init(NumDevices))
FATAL_MESSAGE(1, "Error initializing the RPC server: %d\n", Err);
#endif
}
llvm::Expected<bool>
RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device,
plugin::GenericGlobalHandlerTy &Handler,
plugin::DeviceImageTy &Image) {
#ifdef LIBOMPTARGET_RPC_SUPPORT
void *ClientPtr;
plugin::GlobalTy Global(rpc_client_symbol_name, sizeof(void *), &ClientPtr);
if (auto Err = Handler.readGlobalFromImage(Device, Image, Global)) {
llvm::consumeError(std::move(Err));
return false;
}
return true;
#else
return false;
#endif
}
Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
plugin::GenericGlobalHandlerTy &Handler,
plugin::DeviceImageTy &Image) {
#ifdef LIBOMPTARGET_RPC_SUPPORT
uint32_t DeviceId = Device.getDeviceId();
auto Alloc = [](uint64_t Size, void *Data) {
plugin::GenericDeviceTy &Device =
*reinterpret_cast<plugin::GenericDeviceTy *>(Data);
return Device.allocate(Size, nullptr, TARGET_ALLOC_HOST);
};
// TODO: Allow the device to declare its requested port count.
if (rpc_status_t Err = rpc_server_init(DeviceId, RPC_MAXIMUM_PORT_COUNT,
Device.getWarpSize(), Alloc, &Device))
return plugin::Plugin::error(
"Failed to initialize RPC server for device %d: %d", DeviceId, Err);
// Register a custom opcode handler to perform plugin specific allocation.
// FIXME: We need to make sure this uses asynchronous allocations on CUDA.
auto MallocHandler = [](rpc_port_t Port, void *Data) {
rpc_recv_and_send(
Port,
[](rpc_buffer_t *Buffer, void *Data) {
plugin::GenericDeviceTy &Device =
*reinterpret_cast<plugin::GenericDeviceTy *>(Data);
Buffer->data[0] = reinterpret_cast<uintptr_t>(
Device.allocate(Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE));
},
Data);
};
if (rpc_status_t Err =
rpc_register_callback(DeviceId, RPC_MALLOC, MallocHandler, &Device))
return plugin::Plugin::error(
"Failed to register RPC malloc handler for device %d: %d\n", DeviceId,
Err);
// Register a custom opcode handler to perform plugin specific deallocation.
auto FreeHandler = [](rpc_port_t Port, void *Data) {
rpc_recv(
Port,
[](rpc_buffer_t *Buffer, void *Data) {
plugin::GenericDeviceTy &Device =
*reinterpret_cast<plugin::GenericDeviceTy *>(Data);
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
TARGET_ALLOC_DEVICE);
},
Data);
};
if (rpc_status_t Err =
rpc_register_callback(DeviceId, RPC_FREE, FreeHandler, &Device))
return plugin::Plugin::error(
"Failed to register RPC free handler for device %d: %d\n", DeviceId,
Err);
// Get the address of the RPC client from the device.
void *ClientPtr;
plugin::GlobalTy ClientGlobal(rpc_client_symbol_name, sizeof(void *));
if (auto Err =
Handler.getGlobalMetadataFromDevice(Device, Image, ClientGlobal))
return Err;
if (auto Err = Device.dataRetrieve(&ClientPtr, ClientGlobal.getPtr(),
sizeof(void *), nullptr))
return Err;
const void *ClientBuffer = rpc_get_client_buffer(DeviceId);
if (auto Err = Device.dataSubmit(ClientPtr, ClientBuffer,
rpc_get_client_size(), nullptr))
return Err;
#endif
return Error::success();
}
Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
#ifdef LIBOMPTARGET_RPC_SUPPORT
if (rpc_status_t Err = rpc_handle_server(Device.getDeviceId()))
return plugin::Plugin::error(
"Error while running RPC server on device %d: %d", Device.getDeviceId(),
Err);
#endif
return Error::success();
}
Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) {
#ifdef LIBOMPTARGET_RPC_SUPPORT
auto Dealloc = [](void *Ptr, void *Data) {
plugin::GenericDeviceTy &Device =
*reinterpret_cast<plugin::GenericDeviceTy *>(Data);
Device.free(Ptr, TARGET_ALLOC_HOST);
};
if (rpc_status_t Err =
rpc_server_shutdown(Device.getDeviceId(), Dealloc, &Device))
return plugin::Plugin::error(
"Failed to shut down RPC server for device %d: %d",
Device.getDeviceId(), Err);
#endif
return Error::success();
}
RPCServerTy::~RPCServerTy() {
#ifdef LIBOMPTARGET_RPC_SUPPORT
rpc_shutdown();
#endif
}