[Offload] Use new error code handling mechanism and lower-case messages (#139275)
[Offload] Use new error code handling mechanism This removes the old ErrorCode-less error method and requires every user to provide a concrete error code. All calls have been updated. In addition, for consistency with error messages elsewhere in LLVM, all messages have been made to start lower case.
This commit is contained in:
@@ -14,24 +14,38 @@
|
||||
// To add new error codes, add them to offload/liboffload/API/Common.td and run
|
||||
// the GenerateOffload target.
|
||||
|
||||
OFFLOAD_ERRC(SUCCESS, "Success", 0)
|
||||
OFFLOAD_ERRC(UNKNOWN, "Unknown or internal error", 1)
|
||||
OFFLOAD_ERRC(SUCCESS, "success", 0)
|
||||
OFFLOAD_ERRC(UNKNOWN, "unknown or internal error", 1)
|
||||
OFFLOAD_ERRC(HOST_IO, "I/O error on host", 2)
|
||||
OFFLOAD_ERRC(INVALID_BINARY, "a provided binary image is malformed", 3)
|
||||
OFFLOAD_ERRC(INVALID_NULL_POINTER,
|
||||
"A pointer argument is null when it should not be", 2)
|
||||
OFFLOAD_ERRC(INVALID_ARGUMENT, "An argument is invalid", 3)
|
||||
OFFLOAD_ERRC(OUT_OF_RESOURCES, "Out of resources", 4)
|
||||
OFFLOAD_ERRC(UNSUPPORTED,
|
||||
"generic error code for unsupported features and enums", 5)
|
||||
"a pointer argument is null when it should not be", 4)
|
||||
OFFLOAD_ERRC(INVALID_ARGUMENT, "an argument is invalid", 5)
|
||||
OFFLOAD_ERRC(NOT_FOUND, "requested object was not found in the binary image", 6)
|
||||
OFFLOAD_ERRC(OUT_OF_RESOURCES, "out of resources", 7)
|
||||
OFFLOAD_ERRC(
|
||||
INVALID_SIZE,
|
||||
"invalid size or dimensions (e.g., must not be zero, or is out of bounds)",
|
||||
6)
|
||||
OFFLOAD_ERRC(INVALID_ENUMERATION, "enumerator argument is not valid", 7)
|
||||
OFFLOAD_ERRC(INVALID_KERNEL_NAME,
|
||||
"Named kernel not found in the program binary", 8)
|
||||
OFFLOAD_ERRC(INVALID_VALUE, "Invalid Value", 9)
|
||||
OFFLOAD_ERRC(INVALID_PLATFORM, "Invalid platform", 10)
|
||||
OFFLOAD_ERRC(INVALID_DEVICE, "Invalid device", 11)
|
||||
OFFLOAD_ERRC(INVALID_QUEUE, "Invalid queue", 12)
|
||||
OFFLOAD_ERRC(INVALID_EVENT, "Invalid event", 13)
|
||||
OFFLOAD_ERRC(INVALID_NULL_HANDLE, "handle argument is not valid", 14)
|
||||
8)
|
||||
OFFLOAD_ERRC(INVALID_ENUMERATION, "enumerator argument is not valid", 9)
|
||||
OFFLOAD_ERRC(HOST_TOOL_NOT_FOUND,
|
||||
"a required binary (linker, etc.) was not found on the host", 10)
|
||||
OFFLOAD_ERRC(INVALID_VALUE, "invalid value", 11)
|
||||
OFFLOAD_ERRC(UNIMPLEMENTED,
|
||||
"generic error code for features currently unimplemented by the "
|
||||
"device/backend",
|
||||
12)
|
||||
OFFLOAD_ERRC(
|
||||
UNSUPPORTED,
|
||||
"generic error code for features unsupported by the device/backend", 13)
|
||||
OFFLOAD_ERRC(ASSEMBLE_FAILURE,
|
||||
"assembler failure while processing binary image", 14)
|
||||
OFFLOAD_ERRC(LINK_FAILURE, "linker failure while processing binary image", 15)
|
||||
OFFLOAD_ERRC(BACKEND_FAILURE,
|
||||
"the plugin backend is in an invalid or unsupported state", 16)
|
||||
OFFLOAD_ERRC(INVALID_NULL_HANDLE,
|
||||
"a handle argument is null when it should not be", 17)
|
||||
OFFLOAD_ERRC(INVALID_PLATFORM, "invalid platform", 18)
|
||||
OFFLOAD_ERRC(INVALID_DEVICE, "invalid device", 19)
|
||||
OFFLOAD_ERRC(INVALID_QUEUE, "invalid queue", 20)
|
||||
OFFLOAD_ERRC(INVALID_EVENT, "invalid event", 21)
|
||||
|
||||
@@ -46,6 +46,42 @@ public:
|
||||
// The definition for this resides in the plugin static library
|
||||
static char ID;
|
||||
};
|
||||
|
||||
/// Create an Offload error.
|
||||
template <typename... ArgsTy>
|
||||
static llvm::Error createOffloadError(error::ErrorCode Code, const char *ErrFmt,
|
||||
ArgsTy... Args) {
|
||||
std::string Buffer;
|
||||
llvm::raw_string_ostream(Buffer) << llvm::format(ErrFmt, Args...);
|
||||
return llvm::make_error<error::OffloadError>(Code, Buffer);
|
||||
}
|
||||
|
||||
inline llvm::Error createOffloadError(error::ErrorCode Code, const char *S) {
|
||||
return llvm::make_error<error::OffloadError>(Code, S);
|
||||
}
|
||||
|
||||
// The OffloadError will have a message of either:
|
||||
// * "{Context}: {Message}" if the other error is a StringError
|
||||
// * "{Context}" otherwise
|
||||
inline llvm::Error createOffloadError(error::ErrorCode Code,
|
||||
llvm::Error &&OtherError,
|
||||
const char *Context) {
|
||||
std::string Buffer{Context};
|
||||
llvm::raw_string_ostream buffer(Buffer);
|
||||
|
||||
handleAllErrors(
|
||||
std::move(OtherError),
|
||||
[&](llvm::StringError &Err) {
|
||||
buffer << ": ";
|
||||
buffer << Err.getMessage();
|
||||
},
|
||||
[&](llvm::ErrorInfoBase &Err) {
|
||||
// Non-string error message don't add anything to the offload error's
|
||||
// error message
|
||||
});
|
||||
|
||||
return llvm::make_error<error::OffloadError>(Code, Buffer);
|
||||
}
|
||||
} // namespace error
|
||||
|
||||
#endif
|
||||
|
||||
@@ -87,25 +87,32 @@ def ErrorCode : Enum {
|
||||
let name = "ol_errc_t";
|
||||
let desc = "Defines Return/Error codes";
|
||||
let etors =[
|
||||
Etor<"SUCCESS", "Success">,
|
||||
Etor<"SUCCESS", "success">,
|
||||
|
||||
// Universal errors
|
||||
Etor<"UNKNOWN", "Unknown or internal error">,
|
||||
Etor<"INVALID_NULL_POINTER", "A pointer argument is null when it should not be">,
|
||||
Etor<"INVALID_ARGUMENT", "An argument is invalid">,
|
||||
Etor<"OUT_OF_RESOURCES", "Out of resources">,
|
||||
Etor<"UNSUPPORTED", "generic error code for unsupported features and enums">,
|
||||
Etor<"UNKNOWN", "unknown or internal error">,
|
||||
Etor<"HOST_IO", "I/O error on host">,
|
||||
Etor<"INVALID_BINARY", "a provided binary image is malformed">,
|
||||
Etor<"INVALID_NULL_POINTER", "a pointer argument is null when it should not be">,
|
||||
Etor<"INVALID_ARGUMENT", "an argument is invalid">,
|
||||
Etor<"NOT_FOUND", "requested object was not found in the binary image">,
|
||||
Etor<"OUT_OF_RESOURCES", "out of resources">,
|
||||
Etor<"INVALID_SIZE", "invalid size or dimensions (e.g., must not be zero, or is out of bounds)">,
|
||||
Etor<"INVALID_ENUMERATION", "enumerator argument is not valid">,
|
||||
Etor<"INVALID_KERNEL_NAME", "Named kernel not found in the program binary">,
|
||||
Etor<"HOST_TOOL_NOT_FOUND", "a required binary (linker, etc.) was not found on the host">,
|
||||
Etor<"INVALID_VALUE", "invalid value">,
|
||||
Etor<"UNIMPLEMENTED", "generic error code for features currently unimplemented by the device/backend">,
|
||||
Etor<"UNSUPPORTED", "generic error code for features unsupported by the device/backend">,
|
||||
Etor<"ASSEMBLE_FAILURE", "assembler failure while processing binary image">,
|
||||
Etor<"LINK_FAILURE", "linker failure while processing binary image">,
|
||||
Etor<"BACKEND_FAILURE", "the plugin backend is in an invalid or unsupported state">,
|
||||
|
||||
// Handle related errors - only makes sense for liboffload
|
||||
Etor<"INVALID_VALUE", "Invalid Value">,
|
||||
Etor<"INVALID_PLATFORM", "Invalid platform">,
|
||||
Etor<"INVALID_DEVICE", "Invalid device">,
|
||||
Etor<"INVALID_QUEUE", "Invalid queue">,
|
||||
Etor<"INVALID_EVENT", "Invalid event">,
|
||||
Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">
|
||||
Etor<"INVALID_NULL_HANDLE", "a handle argument is null when it should not be">,
|
||||
Etor<"INVALID_PLATFORM", "invalid platform">,
|
||||
Etor<"INVALID_DEVICE", "invalid device">,
|
||||
Etor<"INVALID_QUEUE", "invalid queue">,
|
||||
Etor<"INVALID_EVENT", "invalid event">,
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -20,36 +20,51 @@ extern "C" {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Defines Return/Error codes
|
||||
typedef enum ol_errc_t {
|
||||
/// Success
|
||||
/// success
|
||||
OL_ERRC_SUCCESS = 0,
|
||||
/// Unknown or internal error
|
||||
/// unknown or internal error
|
||||
OL_ERRC_UNKNOWN = 1,
|
||||
/// A pointer argument is null when it should not be
|
||||
OL_ERRC_INVALID_NULL_POINTER = 2,
|
||||
/// An argument is invalid
|
||||
OL_ERRC_INVALID_ARGUMENT = 3,
|
||||
/// Out of resources
|
||||
OL_ERRC_OUT_OF_RESOURCES = 4,
|
||||
/// generic error code for unsupported features and enums
|
||||
OL_ERRC_UNSUPPORTED = 5,
|
||||
/// I/O error on host
|
||||
OL_ERRC_HOST_IO = 2,
|
||||
/// a provided binary image is malformed
|
||||
OL_ERRC_INVALID_BINARY = 3,
|
||||
/// a pointer argument is null when it should not be
|
||||
OL_ERRC_INVALID_NULL_POINTER = 4,
|
||||
/// an argument is invalid
|
||||
OL_ERRC_INVALID_ARGUMENT = 5,
|
||||
/// requested object was not found in the binary image
|
||||
OL_ERRC_NOT_FOUND = 6,
|
||||
/// out of resources
|
||||
OL_ERRC_OUT_OF_RESOURCES = 7,
|
||||
/// invalid size or dimensions (e.g., must not be zero, or is out of bounds)
|
||||
OL_ERRC_INVALID_SIZE = 6,
|
||||
OL_ERRC_INVALID_SIZE = 8,
|
||||
/// enumerator argument is not valid
|
||||
OL_ERRC_INVALID_ENUMERATION = 7,
|
||||
/// Named kernel not found in the program binary
|
||||
OL_ERRC_INVALID_KERNEL_NAME = 8,
|
||||
/// Invalid Value
|
||||
OL_ERRC_INVALID_VALUE = 9,
|
||||
/// Invalid platform
|
||||
OL_ERRC_INVALID_PLATFORM = 10,
|
||||
/// Invalid device
|
||||
OL_ERRC_INVALID_DEVICE = 11,
|
||||
/// Invalid queue
|
||||
OL_ERRC_INVALID_QUEUE = 12,
|
||||
/// Invalid event
|
||||
OL_ERRC_INVALID_EVENT = 13,
|
||||
/// handle argument is not valid
|
||||
OL_ERRC_INVALID_NULL_HANDLE = 14,
|
||||
OL_ERRC_INVALID_ENUMERATION = 9,
|
||||
/// a required binary (linker, etc.) was not found on the host
|
||||
OL_ERRC_HOST_TOOL_NOT_FOUND = 10,
|
||||
/// invalid value
|
||||
OL_ERRC_INVALID_VALUE = 11,
|
||||
/// generic error code for features currently unimplemented by the
|
||||
/// device/backend
|
||||
OL_ERRC_UNIMPLEMENTED = 12,
|
||||
/// generic error code for features unsupported by the device/backend
|
||||
OL_ERRC_UNSUPPORTED = 13,
|
||||
/// assembler failure while processing binary image
|
||||
OL_ERRC_ASSEMBLE_FAILURE = 14,
|
||||
/// linker failure while processing binary image
|
||||
OL_ERRC_LINK_FAILURE = 15,
|
||||
/// the plugin backend is in an invalid or unsupported state
|
||||
OL_ERRC_BACKEND_FAILURE = 16,
|
||||
/// a handle argument is null when it should not be
|
||||
OL_ERRC_INVALID_NULL_HANDLE = 17,
|
||||
/// invalid platform
|
||||
OL_ERRC_INVALID_PLATFORM = 18,
|
||||
/// invalid device
|
||||
OL_ERRC_INVALID_DEVICE = 19,
|
||||
/// invalid queue
|
||||
OL_ERRC_INVALID_QUEUE = 20,
|
||||
/// invalid event
|
||||
OL_ERRC_INVALID_EVENT = 21,
|
||||
/// @cond
|
||||
OL_ERRC_FORCE_UINT32 = 0x7fffffff
|
||||
/// @endcond
|
||||
|
||||
@@ -52,30 +52,54 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
|
||||
case OL_ERRC_UNKNOWN:
|
||||
os << "OL_ERRC_UNKNOWN";
|
||||
break;
|
||||
case OL_ERRC_HOST_IO:
|
||||
os << "OL_ERRC_HOST_IO";
|
||||
break;
|
||||
case OL_ERRC_INVALID_BINARY:
|
||||
os << "OL_ERRC_INVALID_BINARY";
|
||||
break;
|
||||
case OL_ERRC_INVALID_NULL_POINTER:
|
||||
os << "OL_ERRC_INVALID_NULL_POINTER";
|
||||
break;
|
||||
case OL_ERRC_INVALID_ARGUMENT:
|
||||
os << "OL_ERRC_INVALID_ARGUMENT";
|
||||
break;
|
||||
case OL_ERRC_NOT_FOUND:
|
||||
os << "OL_ERRC_NOT_FOUND";
|
||||
break;
|
||||
case OL_ERRC_OUT_OF_RESOURCES:
|
||||
os << "OL_ERRC_OUT_OF_RESOURCES";
|
||||
break;
|
||||
case OL_ERRC_UNSUPPORTED:
|
||||
os << "OL_ERRC_UNSUPPORTED";
|
||||
break;
|
||||
case OL_ERRC_INVALID_SIZE:
|
||||
os << "OL_ERRC_INVALID_SIZE";
|
||||
break;
|
||||
case OL_ERRC_INVALID_ENUMERATION:
|
||||
os << "OL_ERRC_INVALID_ENUMERATION";
|
||||
break;
|
||||
case OL_ERRC_INVALID_KERNEL_NAME:
|
||||
os << "OL_ERRC_INVALID_KERNEL_NAME";
|
||||
case OL_ERRC_HOST_TOOL_NOT_FOUND:
|
||||
os << "OL_ERRC_HOST_TOOL_NOT_FOUND";
|
||||
break;
|
||||
case OL_ERRC_INVALID_VALUE:
|
||||
os << "OL_ERRC_INVALID_VALUE";
|
||||
break;
|
||||
case OL_ERRC_UNIMPLEMENTED:
|
||||
os << "OL_ERRC_UNIMPLEMENTED";
|
||||
break;
|
||||
case OL_ERRC_UNSUPPORTED:
|
||||
os << "OL_ERRC_UNSUPPORTED";
|
||||
break;
|
||||
case OL_ERRC_ASSEMBLE_FAILURE:
|
||||
os << "OL_ERRC_ASSEMBLE_FAILURE";
|
||||
break;
|
||||
case OL_ERRC_LINK_FAILURE:
|
||||
os << "OL_ERRC_LINK_FAILURE";
|
||||
break;
|
||||
case OL_ERRC_BACKEND_FAILURE:
|
||||
os << "OL_ERRC_BACKEND_FAILURE";
|
||||
break;
|
||||
case OL_ERRC_INVALID_NULL_HANDLE:
|
||||
os << "OL_ERRC_INVALID_NULL_HANDLE";
|
||||
break;
|
||||
case OL_ERRC_INVALID_PLATFORM:
|
||||
os << "OL_ERRC_INVALID_PLATFORM";
|
||||
break;
|
||||
@@ -88,9 +112,6 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
|
||||
case OL_ERRC_INVALID_EVENT:
|
||||
os << "OL_ERRC_INVALID_EVENT";
|
||||
break;
|
||||
case OL_ERRC_INVALID_NULL_HANDLE:
|
||||
os << "OL_ERRC_INVALID_NULL_HANDLE";
|
||||
break;
|
||||
default:
|
||||
os << "unknown enumerator";
|
||||
break;
|
||||
|
||||
@@ -482,7 +482,7 @@ ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
|
||||
auto &Device = Program->Image->getDevice();
|
||||
auto KernelImpl = Device.constructKernel(KernelName);
|
||||
if (!KernelImpl)
|
||||
return OL_ERRC_INVALID_KERNEL_NAME;
|
||||
return ol_impl_result_t::fromError(KernelImpl.takeError());
|
||||
|
||||
auto Err = KernelImpl->init(Device, *Program->Image);
|
||||
if (Err)
|
||||
|
||||
@@ -538,9 +538,9 @@ Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
|
||||
{
|
||||
auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
|
||||
if (DeviceNo >= ExclusiveDevicesAccessor->size())
|
||||
return createStringError(
|
||||
inconvertibleErrorCode(),
|
||||
"Device number '%i' out of range, only %i devices available",
|
||||
return error::createOffloadError(
|
||||
error::ErrorCode::INVALID_VALUE,
|
||||
"device number '%i' out of range, only %i devices available",
|
||||
DeviceNo, ExclusiveDevicesAccessor->size());
|
||||
|
||||
DevicePtr = &*(*ExclusiveDevicesAccessor)[DeviceNo];
|
||||
@@ -549,8 +549,8 @@ Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
|
||||
// Check whether global data has been mapped for this device
|
||||
if (DevicePtr->hasPendingImages())
|
||||
if (loadImagesOntoDevice(*DevicePtr) != OFFLOAD_SUCCESS)
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Failed to load images on device '%i'",
|
||||
DeviceNo);
|
||||
return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
|
||||
"failed to load images on device '%i'",
|
||||
DeviceNo);
|
||||
return *DevicePtr;
|
||||
}
|
||||
|
||||
@@ -79,9 +79,9 @@ DeviceTy::~DeviceTy() {
|
||||
llvm::Error DeviceTy::init() {
|
||||
int32_t Ret = RTL->init_device(RTLDeviceID);
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return llvm::createStringError(llvm::inconvertibleErrorCode(),
|
||||
"Failed to initialize device %d\n",
|
||||
DeviceID);
|
||||
return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
|
||||
"failed to initialize device %d\n",
|
||||
DeviceID);
|
||||
|
||||
// Enables recording kernels if set.
|
||||
BoolEnvar OMPX_RecordKernel("LIBOMPTARGET_RECORD", false);
|
||||
@@ -103,8 +103,8 @@ DeviceTy::loadBinary(__tgt_device_image *Img) {
|
||||
__tgt_device_binary Binary;
|
||||
|
||||
if (RTL->load_binary(RTLDeviceID, Img, &Binary) != OFFLOAD_SUCCESS)
|
||||
return llvm::createStringError(llvm::inconvertibleErrorCode(),
|
||||
"Failed to load binary %p", Img);
|
||||
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
|
||||
"failed to load binary %p", Img);
|
||||
return Binary;
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ typedef enum {
|
||||
HSA_STATUS_INFO_BREAK = 0x1,
|
||||
HSA_STATUS_ERROR = 0x1000,
|
||||
HSA_STATUS_ERROR_INVALID_CODE_OBJECT = 0x1010,
|
||||
HSA_STATUS_ERROR_INVALID_SYMBOL_NAME = 0x1013,
|
||||
HSA_STATUS_ERROR_NOT_INITIALIZED = 0x100B,
|
||||
HSA_STATUS_ERROR_EXCEPTION = 0x1016,
|
||||
} hsa_status_t;
|
||||
|
||||
@@ -75,6 +75,8 @@
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#endif
|
||||
|
||||
using namespace error;
|
||||
|
||||
namespace llvm {
|
||||
namespace omp {
|
||||
namespace target {
|
||||
@@ -132,14 +134,14 @@ hsa_status_t iterate(IterFuncTy Func, IterFuncArgTy FuncArg, CallbackTy Cb) {
|
||||
/// Iterate agents.
|
||||
template <typename CallbackTy> Error iterateAgents(CallbackTy Callback) {
|
||||
hsa_status_t Status = iterate<hsa_agent_t>(hsa_iterate_agents, Callback);
|
||||
return Plugin::check(Status, "Error in hsa_iterate_agents: %s");
|
||||
return Plugin::check(Status, "error in hsa_iterate_agents: %s");
|
||||
}
|
||||
|
||||
/// Iterate ISAs of an agent.
|
||||
template <typename CallbackTy>
|
||||
Error iterateAgentISAs(hsa_agent_t Agent, CallbackTy Cb) {
|
||||
hsa_status_t Status = iterate<hsa_isa_t>(hsa_agent_iterate_isas, Agent, Cb);
|
||||
return Plugin::check(Status, "Error in hsa_agent_iterate_isas: %s");
|
||||
return Plugin::check(Status, "error in hsa_agent_iterate_isas: %s");
|
||||
}
|
||||
|
||||
/// Iterate memory pools of an agent.
|
||||
@@ -148,7 +150,7 @@ Error iterateAgentMemoryPools(hsa_agent_t Agent, CallbackTy Cb) {
|
||||
hsa_status_t Status = iterate<hsa_amd_memory_pool_t>(
|
||||
hsa_amd_agent_iterate_memory_pools, Agent, Cb);
|
||||
return Plugin::check(Status,
|
||||
"Error in hsa_amd_agent_iterate_memory_pools: %s");
|
||||
"error in hsa_amd_agent_iterate_memory_pools: %s");
|
||||
}
|
||||
|
||||
/// Dispatches an asynchronous memory copy.
|
||||
@@ -161,13 +163,14 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
|
||||
hsa_status_t S =
|
||||
hsa_amd_memory_async_copy(Dst, DstAgent, Src, SrcAgent, Size,
|
||||
NumDepSignals, DepSignals, CompletionSignal);
|
||||
return Plugin::check(S, "Error in hsa_amd_memory_async_copy: %s");
|
||||
return Plugin::check(S, "error in hsa_amd_memory_async_copy: %s");
|
||||
}
|
||||
|
||||
// This solution is probably not the best
|
||||
#if !(HSA_AMD_INTERFACE_VERSION_MAJOR >= 1 && \
|
||||
HSA_AMD_INTERFACE_VERSION_MINOR >= 2)
|
||||
return Plugin::error("Async copy on selected SDMA requires ROCm 5.7");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"async copy on selected SDMA requires ROCm 5.7");
|
||||
#else
|
||||
static std::atomic<int> SdmaEngine{1};
|
||||
|
||||
@@ -186,7 +189,7 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
|
||||
LocalSdmaEngine = (LocalSdmaEngine << 1) % 3;
|
||||
SdmaEngine.store(LocalSdmaEngine, std::memory_order_relaxed);
|
||||
|
||||
return Plugin::check(S, "Error in hsa_amd_memory_async_copy_on_engine: %s");
|
||||
return Plugin::check(S, "error in hsa_amd_memory_async_copy_on_engine: %s");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -237,7 +240,8 @@ struct AMDGPUResourceRef : public GenericDeviceResourceRef {
|
||||
/// reference must be to a valid resource before calling to this function.
|
||||
Error destroy(GenericDeviceTy &Device) override {
|
||||
if (!Resource)
|
||||
return Plugin::error("Destroying an invalid resource");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"destroying an invalid resource");
|
||||
|
||||
if (auto Err = Resource->deinit())
|
||||
return Err;
|
||||
@@ -304,13 +308,13 @@ struct AMDGPUMemoryPoolTy {
|
||||
Error allocate(size_t Size, void **PtrStorage) {
|
||||
hsa_status_t Status =
|
||||
hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, PtrStorage);
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_pool_allocate: %s");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_pool_allocate: %s");
|
||||
}
|
||||
|
||||
/// Return memory to the memory pool.
|
||||
Error deallocate(void *Ptr) {
|
||||
hsa_status_t Status = hsa_amd_memory_pool_free(Ptr);
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_pool_free: %s");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_pool_free: %s");
|
||||
}
|
||||
|
||||
/// Returns if the \p Agent can access the memory pool.
|
||||
@@ -335,14 +339,15 @@ struct AMDGPUMemoryPoolTy {
|
||||
// The agent is not allowed to access the memory pool in any case. Do not
|
||||
// continue because otherwise it result in undefined behavior.
|
||||
if (Access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)
|
||||
return Plugin::error("An agent is not allowed to access a memory pool");
|
||||
return Plugin::error(ErrorCode::INVALID_VALUE,
|
||||
"an agent is not allowed to access a memory pool");
|
||||
}
|
||||
#endif
|
||||
|
||||
// We can access but it is disabled by default. Enable the access then.
|
||||
hsa_status_t Status =
|
||||
hsa_amd_agents_allow_access(Agents.size(), Agents.data(), nullptr, Ptr);
|
||||
return Plugin::check(Status, "Error in hsa_amd_agents_allow_access: %s");
|
||||
return Plugin::check(Status, "error in hsa_amd_agents_allow_access: %s");
|
||||
}
|
||||
|
||||
/// Get attribute from the memory pool.
|
||||
@@ -350,7 +355,7 @@ struct AMDGPUMemoryPoolTy {
|
||||
Error getAttr(hsa_amd_memory_pool_info_t Kind, Ty &Value) const {
|
||||
hsa_status_t Status;
|
||||
Status = hsa_amd_memory_pool_get_info(MemoryPool, Kind, &Value);
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_pool_get_info: %s");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_pool_get_info: %s");
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
@@ -366,7 +371,7 @@ struct AMDGPUMemoryPoolTy {
|
||||
Status =
|
||||
hsa_amd_agent_memory_pool_get_info(Agent, MemoryPool, Kind, &Value);
|
||||
return Plugin::check(Status,
|
||||
"Error in hsa_amd_agent_memory_pool_get_info: %s");
|
||||
"error in hsa_amd_agent_memory_pool_get_info: %s");
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -416,7 +421,8 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
|
||||
|
||||
*PtrStorage = MemoryManager->allocate(Size, nullptr);
|
||||
if (*PtrStorage == nullptr)
|
||||
return Plugin::error("Failure to allocate from AMDGPU memory manager");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failure to allocate from AMDGPU memory manager");
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
@@ -426,7 +432,8 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
|
||||
assert(Ptr && "Invalid pointer");
|
||||
|
||||
if (MemoryManager->free(Ptr))
|
||||
return Plugin::error("Failure to deallocate from AMDGPU memory manager");
|
||||
return Plugin::error(ErrorCode::UNKNOWN,
|
||||
"failure to deallocate from AMDGPU memory manager");
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
@@ -468,7 +475,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
|
||||
/// Unload the executable.
|
||||
Error unloadExecutable() {
|
||||
hsa_status_t Status = hsa_executable_destroy(Executable);
|
||||
return Plugin::check(Status, "Error in hsa_executable_destroy: %s");
|
||||
return Plugin::check(Status, "error in hsa_executable_destroy: %s");
|
||||
}
|
||||
|
||||
/// Get the executable.
|
||||
@@ -534,13 +541,14 @@ struct AMDGPUKernelTy : public GenericKernelTy {
|
||||
for (auto &Info : RequiredInfos) {
|
||||
Status = hsa_executable_symbol_get_info(Symbol, Info.first, Info.second);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_executable_symbol_get_info: %s"))
|
||||
Status, "error in hsa_executable_symbol_get_info: %s"))
|
||||
return Err;
|
||||
}
|
||||
|
||||
// Make sure it is a kernel symbol.
|
||||
if (SymbolType != HSA_SYMBOL_KIND_KERNEL)
|
||||
return Plugin::error("Symbol %s is not a kernel function");
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"symbol %s is not a kernel function");
|
||||
|
||||
// TODO: Read the kernel descriptor for the max threads per block. May be
|
||||
// read from the image.
|
||||
@@ -610,13 +618,13 @@ struct AMDGPUSignalTy {
|
||||
Error init(uint32_t InitialValue = 1) {
|
||||
hsa_status_t Status =
|
||||
hsa_amd_signal_create(InitialValue, 0, nullptr, 0, &HSASignal);
|
||||
return Plugin::check(Status, "Error in hsa_signal_create: %s");
|
||||
return Plugin::check(Status, "error in hsa_signal_create: %s");
|
||||
}
|
||||
|
||||
/// Deinitialize the signal.
|
||||
Error deinit() {
|
||||
hsa_status_t Status = hsa_signal_destroy(HSASignal);
|
||||
return Plugin::check(Status, "Error in hsa_signal_destroy: %s");
|
||||
return Plugin::check(Status, "error in hsa_signal_destroy: %s");
|
||||
}
|
||||
|
||||
/// Wait until the signal gets a zero value.
|
||||
@@ -688,7 +696,7 @@ struct AMDGPUQueueTy {
|
||||
hsa_status_t Status =
|
||||
hsa_queue_create(Agent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackError,
|
||||
&Device, UINT32_MAX, UINT32_MAX, &Queue);
|
||||
return Plugin::check(Status, "Error in hsa_queue_create: %s");
|
||||
return Plugin::check(Status, "error in hsa_queue_create: %s");
|
||||
}
|
||||
|
||||
/// Deinitialize the queue and destroy its resources.
|
||||
@@ -697,7 +705,7 @@ struct AMDGPUQueueTy {
|
||||
if (!Queue)
|
||||
return Plugin::success();
|
||||
hsa_status_t Status = hsa_queue_destroy(Queue);
|
||||
return Plugin::check(Status, "Error in hsa_queue_destroy: %s");
|
||||
return Plugin::check(Status, "error in hsa_queue_destroy: %s");
|
||||
}
|
||||
|
||||
/// Returns the number of streams, this queue is currently assigned to.
|
||||
@@ -1115,7 +1123,8 @@ private:
|
||||
/// Use a barrier packet with two input signals.
|
||||
Error waitOnStreamOperation(AMDGPUStreamTy &OtherStream, uint32_t Slot) {
|
||||
if (Queue == nullptr)
|
||||
return Plugin::error("Target queue was nullptr");
|
||||
return Plugin::error(ErrorCode::INVALID_NULL_POINTER,
|
||||
"target queue was nullptr");
|
||||
|
||||
/// The signal that we must wait from the other stream.
|
||||
AMDGPUSignalTy *OtherSignal = OtherStream.Slots[Slot].Signal;
|
||||
@@ -1236,7 +1245,8 @@ public:
|
||||
uint32_t GroupSize, uint64_t StackSize,
|
||||
AMDGPUMemoryManagerTy &MemoryManager) {
|
||||
if (Queue == nullptr)
|
||||
return Plugin::error("Target queue was nullptr");
|
||||
return Plugin::error(ErrorCode::INVALID_NULL_POINTER,
|
||||
"target queue was nullptr");
|
||||
|
||||
// Retrieve an available signal for the operation's output.
|
||||
AMDGPUSignalTy *OutputSignal = nullptr;
|
||||
@@ -1367,7 +1377,7 @@ public:
|
||||
InputSignal->get(), HSA_SIGNAL_CONDITION_EQ, 0, asyncActionCallback,
|
||||
(void *)&Slots[Curr]);
|
||||
|
||||
return Plugin::check(Status, "Error in hsa_amd_signal_async_handler: %s");
|
||||
return Plugin::check(Status, "error in hsa_amd_signal_async_handler: %s");
|
||||
}
|
||||
|
||||
/// Push an asynchronous memory copy host-to-device involving an unpinned
|
||||
@@ -1411,7 +1421,7 @@ public:
|
||||
(void *)&Slots[Curr]);
|
||||
|
||||
if (auto Err = Plugin::check(Status,
|
||||
"Error in hsa_amd_signal_async_handler: %s"))
|
||||
"error in hsa_amd_signal_async_handler: %s"))
|
||||
return Err;
|
||||
|
||||
// Let's use now the second output signal.
|
||||
@@ -1553,7 +1563,8 @@ struct AMDGPUEventTy {
|
||||
std::lock_guard<std::mutex> Lock(Mutex);
|
||||
|
||||
if (!RecordedStream)
|
||||
return Plugin::error("Event does not have any recorded stream");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"event does not have any recorded stream");
|
||||
|
||||
// Synchronizing the same stream. Do nothing.
|
||||
if (RecordedStream == &Stream)
|
||||
@@ -1942,7 +1953,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
else if (WavefrontSize == 64)
|
||||
GridValues = getAMDGPUGridValues<64>();
|
||||
else
|
||||
return Plugin::error("Unexpected AMDGPU wavefront %d", WavefrontSize);
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"unexpected AMDGPU wavefront %d", WavefrontSize);
|
||||
|
||||
// Get maximum number of workitems per workgroup.
|
||||
uint16_t WorkgroupMaxDim[3];
|
||||
@@ -1958,7 +1970,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
|
||||
GridValues.GV_Max_Teams = GridMaxDim.x / GridValues.GV_Max_WG_Size;
|
||||
if (GridValues.GV_Max_Teams == 0)
|
||||
return Plugin::error("Maximum number of teams cannot be zero");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"maximum number of teams cannot be zero");
|
||||
|
||||
// Compute the default number of teams.
|
||||
uint32_t ComputeUnits = 0;
|
||||
@@ -2071,7 +2084,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
std::error_code EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit",
|
||||
"o", LinkerInputFilePath);
|
||||
if (EC)
|
||||
return Plugin::error("Failed to create temporary file for linker");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to create temporary file for linker");
|
||||
|
||||
// Write the file's contents to the output file.
|
||||
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
|
||||
@@ -2087,12 +2101,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
EC = sys::fs::createTemporaryFile("amdgpu-pre-link-jit", "so",
|
||||
LinkerOutputFilePath);
|
||||
if (EC)
|
||||
return Plugin::error("Failed to create temporary file for linker");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to create temporary file for linker");
|
||||
|
||||
const auto &ErrorOrPath = sys::findProgramByName("lld");
|
||||
if (!ErrorOrPath)
|
||||
return createStringError(inconvertibleErrorCode(),
|
||||
"Failed to find `lld` on the PATH.");
|
||||
return createStringError(ErrorCode::HOST_TOOL_NOT_FOUND,
|
||||
"failed to find `lld` on the PATH.");
|
||||
|
||||
std::string LLDPath = ErrorOrPath.get();
|
||||
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, getDeviceId(),
|
||||
@@ -2112,18 +2127,22 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
std::string Error;
|
||||
int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error);
|
||||
if (RC)
|
||||
return Plugin::error("Linking optimized bitcode failed: %s",
|
||||
return Plugin::error(ErrorCode::LINK_FAILURE,
|
||||
"linking optimized bitcode failed: %s",
|
||||
Error.c_str());
|
||||
|
||||
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath);
|
||||
if (!BufferOrErr)
|
||||
return Plugin::error("Failed to open temporary file for lld");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to open temporary file for lld");
|
||||
|
||||
// Clean up the temporary files afterwards.
|
||||
if (sys::fs::remove(LinkerOutputFilePath))
|
||||
return Plugin::error("Failed to remove temporary output file for lld");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to remove temporary output file for lld");
|
||||
if (sys::fs::remove(LinkerInputFilePath))
|
||||
return Plugin::error("Failed to remove temporary input file for lld");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to remove temporary input file for lld");
|
||||
|
||||
return std::move(*BufferOrErr);
|
||||
}
|
||||
@@ -2139,7 +2158,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
// Allocate and construct the AMDGPU kernel.
|
||||
AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>();
|
||||
if (!AMDGPUKernel)
|
||||
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate memory for AMDGPU kernel");
|
||||
|
||||
new (AMDGPUKernel) AMDGPUKernelTy(Name);
|
||||
|
||||
@@ -2275,7 +2295,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
|
||||
hsa_status_t Status =
|
||||
hsa_amd_memory_lock(HstPtr, Size, nullptr, 0, &PinnedPtr);
|
||||
if (auto Err = Plugin::check(Status, "Error in hsa_amd_memory_lock: %s\n"))
|
||||
if (auto Err = Plugin::check(Status, "error in hsa_amd_memory_lock: %s\n"))
|
||||
return std::move(Err);
|
||||
|
||||
return PinnedPtr;
|
||||
@@ -2284,7 +2304,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
/// Unpin the host buffer.
|
||||
Error dataUnlockImpl(void *HstPtr) override {
|
||||
hsa_status_t Status = hsa_amd_memory_unlock(HstPtr);
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_unlock: %s\n");
|
||||
}
|
||||
|
||||
/// Check through the HSA runtime whether the \p HstPtr buffer is pinned.
|
||||
@@ -2297,7 +2317,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
hsa_status_t Status = hsa_amd_pointer_info(
|
||||
HstPtr, &Info, /*Allocator=*/nullptr, /*num_agents_accessible=*/nullptr,
|
||||
/*accessible=*/nullptr);
|
||||
if (auto Err = Plugin::check(Status, "Error in hsa_amd_pointer_info: %s"))
|
||||
if (auto Err = Plugin::check(Status, "error in hsa_amd_pointer_info: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
// The buffer may be locked or allocated through HSA allocators. Assume that
|
||||
@@ -2342,7 +2362,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
Status = hsa_amd_memory_lock(const_cast<void *>(HstPtr), Size, nullptr, 0,
|
||||
&PinnedPtr);
|
||||
if (auto Err =
|
||||
Plugin::check(Status, "Error in hsa_amd_memory_lock: %s\n"))
|
||||
Plugin::check(Status, "error in hsa_amd_memory_lock: %s\n"))
|
||||
return Err;
|
||||
|
||||
AMDGPUSignalTy Signal;
|
||||
@@ -2361,7 +2381,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
return Err;
|
||||
|
||||
Status = hsa_amd_memory_unlock(const_cast<void *>(HstPtr));
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_unlock: %s\n");
|
||||
}
|
||||
|
||||
// Otherwise, use two-step copy with an intermediate pinned host buffer.
|
||||
@@ -2402,7 +2422,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
Status = hsa_amd_memory_lock(const_cast<void *>(HstPtr), Size, nullptr, 0,
|
||||
&PinnedPtr);
|
||||
if (auto Err =
|
||||
Plugin::check(Status, "Error in hsa_amd_memory_lock: %s\n"))
|
||||
Plugin::check(Status, "error in hsa_amd_memory_lock: %s\n"))
|
||||
return Err;
|
||||
|
||||
AMDGPUSignalTy Signal;
|
||||
@@ -2421,7 +2441,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
return Err;
|
||||
|
||||
Status = hsa_amd_memory_unlock(const_cast<void *>(HstPtr));
|
||||
return Plugin::check(Status, "Error in hsa_amd_memory_unlock: %s\n");
|
||||
return Plugin::check(Status, "error in hsa_amd_memory_unlock: %s\n");
|
||||
}
|
||||
|
||||
// Otherwise, use two-step copy with an intermediate pinned host buffer.
|
||||
@@ -2529,7 +2549,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
|
||||
/// Synchronize the current thread with the event.
|
||||
Error syncEventImpl(void *EventPtr) override {
|
||||
return Plugin::error("Synchronize event not implemented");
|
||||
return Plugin::error(ErrorCode::UNIMPLEMENTED,
|
||||
"synchronize event not implemented");
|
||||
}
|
||||
|
||||
/// Print information about the device.
|
||||
@@ -2774,10 +2795,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
|
||||
if (Pool->isGlobal()) {
|
||||
hsa_status_t Status =
|
||||
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
|
||||
return Plugin::check(Status, "Error in getting device memory size: %s");
|
||||
return Plugin::check(Status, "error in getting device memory size: %s");
|
||||
}
|
||||
}
|
||||
return Plugin::error("getDeviceMemorySize:: no global pool");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"getDeviceMemorySize:: no global pool");
|
||||
}
|
||||
|
||||
/// AMDGPU-specific function to get device attributes.
|
||||
@@ -2955,37 +2977,38 @@ Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
|
||||
hsa_status_t Status =
|
||||
hsa_code_object_reader_create_from_memory(getStart(), getSize(), &Reader);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_code_object_reader_create_from_memory: %s"))
|
||||
Status, "error in hsa_code_object_reader_create_from_memory: %s"))
|
||||
return Err;
|
||||
|
||||
Status = hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, "", &Executable);
|
||||
if (auto Err =
|
||||
Plugin::check(Status, "Error in hsa_executable_create_alt: %s"))
|
||||
Plugin::check(Status, "error in hsa_executable_create_alt: %s"))
|
||||
return Err;
|
||||
|
||||
hsa_loaded_code_object_t Object;
|
||||
Status = hsa_executable_load_agent_code_object(Executable, Device.getAgent(),
|
||||
Reader, "", &Object);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_executable_load_agent_code_object: %s"))
|
||||
Status, "error in hsa_executable_load_agent_code_object: %s"))
|
||||
return Err;
|
||||
|
||||
Status = hsa_executable_freeze(Executable, "");
|
||||
if (auto Err = Plugin::check(Status, "Error in hsa_executable_freeze: %s"))
|
||||
if (auto Err = Plugin::check(Status, "error in hsa_executable_freeze: %s"))
|
||||
return Err;
|
||||
|
||||
uint32_t Result;
|
||||
Status = hsa_executable_validate(Executable, &Result);
|
||||
if (auto Err = Plugin::check(Status, "Error in hsa_executable_validate: %s"))
|
||||
if (auto Err = Plugin::check(Status, "error in hsa_executable_validate: %s"))
|
||||
return Err;
|
||||
|
||||
if (Result)
|
||||
return Plugin::error("Loaded HSA executable does not validate");
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"loaded HSA executable does not validate");
|
||||
|
||||
Status = hsa_code_object_reader_destroy(Reader);
|
||||
if (auto Err =
|
||||
Plugin::check(Status, "Error in hsa_code_object_reader_destroy: %s"))
|
||||
Plugin::check(Status, "error in hsa_code_object_reader_destroy: %s"))
|
||||
return Err;
|
||||
|
||||
if (auto Err = hsa_utils::readAMDGPUMetaDataFromImage(
|
||||
@@ -3006,7 +3029,7 @@ AMDGPUDeviceImageTy::findDeviceSymbol(GenericDeviceTy &Device,
|
||||
hsa_status_t Status = hsa_executable_get_symbol_by_name(
|
||||
Executable, SymbolName.data(), &Agent, &Symbol);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_executable_get_symbol_by_name(%s): %s",
|
||||
Status, "error in hsa_executable_get_symbol_by_name(%s): %s",
|
||||
SymbolName.data()))
|
||||
return std::move(Err);
|
||||
|
||||
@@ -3016,7 +3039,8 @@ AMDGPUDeviceImageTy::findDeviceSymbol(GenericDeviceTy &Device,
|
||||
template <typename ResourceTy>
|
||||
Error AMDGPUResourceRef<ResourceTy>::create(GenericDeviceTy &Device) {
|
||||
if (Resource)
|
||||
return Plugin::error("Creating an existing resource");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"creating an existing resource");
|
||||
|
||||
AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(Device);
|
||||
|
||||
@@ -3065,14 +3089,15 @@ struct AMDGPUGlobalHandlerTy final : public GenericGlobalHandlerTy {
|
||||
for (auto &Info : RequiredInfos) {
|
||||
Status = hsa_executable_symbol_get_info(Symbol, Info.first, Info.second);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_executable_symbol_get_info: %s"))
|
||||
Status, "error in hsa_executable_symbol_get_info: %s"))
|
||||
return Err;
|
||||
}
|
||||
|
||||
// Check the size of the symbol.
|
||||
if (SymbolSize != DeviceGlobal.getSize())
|
||||
return Plugin::error(
|
||||
"Failed to load global '%s' due to size mismatch (%zu != %zu)",
|
||||
ErrorCode::INVALID_BINARY,
|
||||
"failed to load global '%s' due to size mismatch (%zu != %zu)",
|
||||
DeviceGlobal.getName().data(), SymbolSize,
|
||||
(size_t)DeviceGlobal.getSize());
|
||||
|
||||
@@ -3110,7 +3135,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
|
||||
// Register event handler to detect memory errors on the devices.
|
||||
Status = hsa_amd_register_system_event_handler(eventHandler, this);
|
||||
if (auto Err = Plugin::check(
|
||||
Status, "Error in hsa_amd_register_system_event_handler: %s"))
|
||||
Status, "error in hsa_amd_register_system_event_handler: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
// List of host (CPU) agents.
|
||||
@@ -3151,7 +3176,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
|
||||
// There are kernel agents but there is no host agent. That should be
|
||||
// treated as an error.
|
||||
if (HostAgents.empty())
|
||||
return Plugin::error("No AMDGPU host agents");
|
||||
return Plugin::error(ErrorCode::BACKEND_FAILURE, "no AMDGPU host agents");
|
||||
|
||||
// Initialize the host device using host agents.
|
||||
HostDevice = allocate<AMDHostDeviceTy>();
|
||||
@@ -3177,7 +3202,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
|
||||
|
||||
// Finalize the HSA runtime.
|
||||
hsa_status_t Status = hsa_shut_down();
|
||||
return Plugin::check(Status, "Error in hsa_shut_down: %s");
|
||||
return Plugin::check(Status, "error in hsa_shut_down: %s");
|
||||
}
|
||||
|
||||
/// Creates an AMDGPU device.
|
||||
@@ -3297,7 +3322,7 @@ private:
|
||||
void *DevicePtr = (void *)Event->memory_fault.virtual_address;
|
||||
std::string S;
|
||||
llvm::raw_string_ostream OS(S);
|
||||
OS << llvm::format("Memory access fault by GPU %" PRIu32
|
||||
OS << llvm::format("memory access fault by GPU %" PRIu32
|
||||
" (agent 0x%" PRIx64
|
||||
") at virtual address %p. Reasons: %s",
|
||||
Node, Event->memory_fault.agent.handle,
|
||||
@@ -3310,7 +3335,7 @@ private:
|
||||
|
||||
// Abort the execution since we do not recover from this error.
|
||||
FATAL_MESSAGE(1,
|
||||
"Memory access fault by GPU %" PRIu32 " (agent 0x%" PRIx64
|
||||
"memory access fault by GPU %" PRIu32 " (agent 0x%" PRIx64
|
||||
") at virtual address %p. Reasons: %s",
|
||||
Node, Event->memory_fault.agent.handle,
|
||||
(void *)Event->memory_fault.virtual_address,
|
||||
@@ -3341,7 +3366,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
|
||||
if (ArgsSize != LaunchParams.Size &&
|
||||
ArgsSize != LaunchParams.Size + getImplicitArgsSize())
|
||||
return Plugin::error("Mismatch of kernel arguments size");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"mismatch of kernel arguments size");
|
||||
|
||||
AMDGPUPluginTy &AMDGPUPlugin =
|
||||
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
|
||||
@@ -3454,15 +3480,24 @@ template <typename... ArgsTy>
|
||||
static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
|
||||
hsa_status_t ResultCode = static_cast<hsa_status_t>(Code);
|
||||
if (ResultCode == HSA_STATUS_SUCCESS || ResultCode == HSA_STATUS_INFO_BREAK)
|
||||
return Error::success();
|
||||
return Plugin::success();
|
||||
|
||||
const char *Desc = "Unknown error";
|
||||
const char *Desc = "unknown error";
|
||||
hsa_status_t Ret = hsa_status_string(ResultCode, &Desc);
|
||||
if (Ret != HSA_STATUS_SUCCESS)
|
||||
REPORT("Unrecognized " GETNAME(TARGET_NAME) " error code %d\n", Code);
|
||||
|
||||
return createStringError<ArgsTy..., const char *>(inconvertibleErrorCode(),
|
||||
ErrFmt, Args..., Desc);
|
||||
// TODO: Add more entries to this switch
|
||||
ErrorCode OffloadErrCode;
|
||||
switch (ResultCode) {
|
||||
case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
|
||||
OffloadErrCode = ErrorCode::NOT_FOUND;
|
||||
break;
|
||||
default:
|
||||
OffloadErrCode = ErrorCode::UNKNOWN;
|
||||
}
|
||||
|
||||
return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
|
||||
}
|
||||
|
||||
void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
|
||||
@@ -3559,7 +3594,7 @@ void AMDGPUQueueTy::callbackError(hsa_status_t Status, hsa_queue_t *Source,
|
||||
AsyncInfoWrapperMatcher);
|
||||
}
|
||||
|
||||
auto Err = Plugin::check(Status, "Received error in queue %p: %s", Source);
|
||||
auto Err = Plugin::check(Status, "received error in queue %p: %s", Source);
|
||||
FATAL_MESSAGE(1, "%s", toString(std::move(Err)).data());
|
||||
}
|
||||
|
||||
|
||||
@@ -1383,22 +1383,16 @@ static inline Error success() { return Error::success(); }
|
||||
/// Create an Offload error.
|
||||
template <typename... ArgsTy>
|
||||
static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) {
|
||||
std::string Buffer;
|
||||
raw_string_ostream(Buffer) << format(ErrFmt, Args...);
|
||||
return make_error<error::OffloadError>(Code, Buffer);
|
||||
}
|
||||
|
||||
template <typename... ArgsTy>
|
||||
static Error error(const char *ErrFmt, ArgsTy... Args) {
|
||||
return error(error::ErrorCode::UNKNOWN, ErrFmt, Args...);
|
||||
return error::createOffloadError(Code, ErrFmt, Args...);
|
||||
}
|
||||
|
||||
inline Error error(error::ErrorCode Code, const char *S) {
|
||||
return make_error<error::OffloadError>(Code, S);
|
||||
}
|
||||
|
||||
inline Error error(const char *S) {
|
||||
return make_error<error::OffloadError>(error::ErrorCode::UNKNOWN, S);
|
||||
inline Error error(error::ErrorCode Code, Error &&OtherError,
|
||||
const char *Context) {
|
||||
return error::createOffloadError(Code, std::move(OtherError), Context);
|
||||
}
|
||||
|
||||
/// Check the plugin-specific error code and return an error or success
|
||||
|
||||
@@ -26,13 +26,20 @@ using namespace llvm;
|
||||
using namespace omp;
|
||||
using namespace target;
|
||||
using namespace plugin;
|
||||
using namespace error;
|
||||
|
||||
Expected<std::unique_ptr<ObjectFile>>
|
||||
GenericGlobalHandlerTy::getELFObjectFile(DeviceImageTy &Image) {
|
||||
assert(utils::elf::isELF(Image.getMemoryBuffer().getBuffer()) &&
|
||||
"Input is not an ELF file");
|
||||
|
||||
return ELFObjectFileBase::createELFObjectFile(Image.getMemoryBuffer());
|
||||
auto Expected =
|
||||
ELFObjectFileBase::createELFObjectFile(Image.getMemoryBuffer());
|
||||
if (!Expected) {
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY, Expected.takeError(),
|
||||
"error parsing binary");
|
||||
}
|
||||
return Expected;
|
||||
}
|
||||
|
||||
Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost(
|
||||
@@ -112,20 +119,21 @@ Error GenericGlobalHandlerTy::getGlobalMetadataFromImage(
|
||||
// Search the ELF symbol using the symbol name.
|
||||
auto SymOrErr = utils::elf::getSymbol(**ELFObj, ImageGlobal.getName());
|
||||
if (!SymOrErr)
|
||||
return Plugin::error("Failed ELF lookup of global '%s': %s",
|
||||
ImageGlobal.getName().data(),
|
||||
toString(SymOrErr.takeError()).data());
|
||||
return Plugin::error(
|
||||
ErrorCode::NOT_FOUND, "failed ELF lookup of global '%s': %s",
|
||||
ImageGlobal.getName().data(), toString(SymOrErr.takeError()).data());
|
||||
|
||||
if (!SymOrErr->has_value())
|
||||
return Plugin::error("Failed to find global symbol '%s' in the ELF image",
|
||||
return Plugin::error(ErrorCode::NOT_FOUND,
|
||||
"failed to find global symbol '%s' in the ELF image",
|
||||
ImageGlobal.getName().data());
|
||||
|
||||
auto AddrOrErr = utils::elf::getSymbolAddress(**SymOrErr);
|
||||
// Get the section to which the symbol belongs.
|
||||
if (!AddrOrErr)
|
||||
return Plugin::error("Failed to get ELF symbol from global '%s': %s",
|
||||
ImageGlobal.getName().data(),
|
||||
toString(AddrOrErr.takeError()).data());
|
||||
return Plugin::error(
|
||||
ErrorCode::NOT_FOUND, "failed to get ELF symbol from global '%s': %s",
|
||||
ImageGlobal.getName().data(), toString(AddrOrErr.takeError()).data());
|
||||
|
||||
// Setup the global symbol's address and size.
|
||||
ImageGlobal.setPtr(const_cast<void *>(*AddrOrErr));
|
||||
@@ -143,7 +151,8 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
|
||||
return Err;
|
||||
|
||||
if (ImageGlobal.getSize() != HostGlobal.getSize())
|
||||
return Plugin::error("Transfer failed because global symbol '%s' has "
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"transfer failed because global symbol '%s' has "
|
||||
"%u bytes in the ELF image but %u bytes on the host",
|
||||
HostGlobal.getName().data(), ImageGlobal.getSize(),
|
||||
HostGlobal.getSize());
|
||||
@@ -274,7 +283,8 @@ void GPUProfGlobals::dump() const {
|
||||
|
||||
Error GPUProfGlobals::write() const {
|
||||
if (!__llvm_write_custom_profile)
|
||||
return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"could not find symbol __llvm_write_custom_profile. "
|
||||
"The compiler-rt profiling library must be linked for "
|
||||
"GPU PGO to work.");
|
||||
|
||||
@@ -307,7 +317,8 @@ Error GPUProfGlobals::write() const {
|
||||
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
|
||||
CountersEnd, NamesBegin, NamesEnd, &Version);
|
||||
if (result != 0)
|
||||
return Plugin::error("Error writing GPU PGO data to file");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"error writing GPU PGO data to file");
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
@@ -62,8 +62,8 @@ createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB,
|
||||
SMDiagnostic Err;
|
||||
auto Mod = parseIR(*MB, Err, Context);
|
||||
if (!Mod)
|
||||
return make_error<StringError>("Failed to create module",
|
||||
inconvertibleErrorCode());
|
||||
return error::createOffloadError(error::ErrorCode::UNKNOWN,
|
||||
"failed to create module");
|
||||
return std::move(Mod);
|
||||
}
|
||||
Expected<std::unique_ptr<Module>>
|
||||
@@ -100,7 +100,8 @@ createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
|
||||
std::string Msg;
|
||||
const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
|
||||
if (!T)
|
||||
return make_error<StringError>(Msg, inconvertibleErrorCode());
|
||||
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
|
||||
Msg.data());
|
||||
|
||||
SubtargetFeatures Features;
|
||||
Features.getDefaultSubtargetFeatures(TT);
|
||||
@@ -118,8 +119,8 @@ createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
|
||||
T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(),
|
||||
Options, RelocModel, CodeModel, CGOptLevel));
|
||||
if (!TM)
|
||||
return make_error<StringError>("Failed to create target machine",
|
||||
inconvertibleErrorCode());
|
||||
return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
|
||||
"failed to create target machine");
|
||||
return std::move(TM);
|
||||
}
|
||||
|
||||
@@ -221,7 +222,8 @@ JITEngine::backend(Module &M, const std::string &ComputeUnitKind,
|
||||
raw_fd_stream FD(PostOptIRModuleFileName.get(), EC);
|
||||
if (EC)
|
||||
return createStringError(
|
||||
EC, "Could not open %s to write the post-opt IR module\n",
|
||||
error::ErrorCode::HOST_IO,
|
||||
"Could not open %s to write the post-opt IR module\n",
|
||||
PostOptIRModuleFileName.get().c_str());
|
||||
M.print(FD, nullptr);
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ using namespace llvm;
|
||||
using namespace omp;
|
||||
using namespace target;
|
||||
using namespace plugin;
|
||||
using namespace error;
|
||||
|
||||
// TODO: Fix any thread safety issues for multi-threaded kernel recording.
|
||||
namespace llvm::omp::target::plugin {
|
||||
@@ -94,7 +95,8 @@ private:
|
||||
return Err;
|
||||
|
||||
if (isReplaying() && VAddr != MemoryStart) {
|
||||
return Plugin::error("Record-Replay cannot assign the"
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"record-Replay cannot assign the"
|
||||
"requested recorded address (%p, %p)",
|
||||
VAddr, MemoryStart);
|
||||
}
|
||||
@@ -121,7 +123,8 @@ private:
|
||||
break;
|
||||
}
|
||||
if (!MemoryStart)
|
||||
return Plugin::error("Allocating record/replay memory");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"allocating record/replay memory");
|
||||
|
||||
if (VAddr && VAddr != MemoryStart)
|
||||
MemoryOffset = uintptr_t(VAddr) - uintptr_t(MemoryStart);
|
||||
@@ -166,7 +169,8 @@ private:
|
||||
|
||||
uint64_t DevMemSize;
|
||||
if (Device->getDeviceMemorySize(DevMemSize))
|
||||
return Plugin::error("Cannot determine Device Memory Size");
|
||||
return Plugin::error(ErrorCode::UNKNOWN,
|
||||
"cannot determine Device Memory Size");
|
||||
|
||||
return preAllocateHeuristic(DevMemSize, DeviceMemorySize, ReqVAddr);
|
||||
}
|
||||
@@ -1078,7 +1082,8 @@ Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
|
||||
// Insert the new entry into the map.
|
||||
auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked});
|
||||
if (!Res.second)
|
||||
return Plugin::error("Cannot insert locked buffer entry");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"cannot insert locked buffer entry");
|
||||
|
||||
// Check whether the next entry overlaps with the inserted entry.
|
||||
auto It = std::next(Res.first);
|
||||
@@ -1087,7 +1092,8 @@ Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
|
||||
|
||||
const EntryTy *NextEntry = &(*It);
|
||||
if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size))
|
||||
return Plugin::error("Partial overlapping not allowed in locked buffers");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"partial overlapping not allowed in locked buffers");
|
||||
|
||||
return Plugin::success();
|
||||
}
|
||||
@@ -1098,14 +1104,16 @@ Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) {
|
||||
// the code more difficult to read.
|
||||
size_t Erased = Allocs.erase({Entry.HstPtr});
|
||||
if (!Erased)
|
||||
return Plugin::error("Cannot erase locked buffer entry");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"cannot erase locked buffer entry");
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry,
|
||||
void *HstPtr, size_t Size) {
|
||||
if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size))
|
||||
return Plugin::error("Partial overlapping not allowed in locked buffers");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"partial overlapping not allowed in locked buffers");
|
||||
|
||||
++Entry.References;
|
||||
return Plugin::success();
|
||||
@@ -1113,7 +1121,8 @@ Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry,
|
||||
|
||||
Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) {
|
||||
if (Entry.References == 0)
|
||||
return Plugin::error("Invalid number of references");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"invalid number of references");
|
||||
|
||||
// Return whether this was the last user.
|
||||
return (--Entry.References == 0);
|
||||
@@ -1131,7 +1140,8 @@ Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr,
|
||||
// No pinned allocation should intersect.
|
||||
const EntryTy *Entry = findIntersecting(HstPtr);
|
||||
if (Entry)
|
||||
return Plugin::error("Cannot insert entry due to an existing one");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"cannot insert entry due to an existing one");
|
||||
|
||||
// Now insert the new entry.
|
||||
return insertEntry(HstPtr, DevAccessiblePtr, Size);
|
||||
@@ -1144,11 +1154,13 @@ Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
|
||||
|
||||
const EntryTy *Entry = findIntersecting(HstPtr);
|
||||
if (!Entry)
|
||||
return Plugin::error("Cannot find locked buffer");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"cannot find locked buffer");
|
||||
|
||||
// The address in the entry should be the same we are unregistering.
|
||||
if (Entry->HstPtr != HstPtr)
|
||||
return Plugin::error("Unexpected host pointer in locked buffer entry");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"unexpected host pointer in locked buffer entry");
|
||||
|
||||
// Unregister from the entry.
|
||||
auto LastUseOrErr = unregisterEntryUse(*Entry);
|
||||
@@ -1157,7 +1169,8 @@ Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
|
||||
|
||||
// There should be no other references to the pinned allocation.
|
||||
if (!(*LastUseOrErr))
|
||||
return Plugin::error("The locked buffer is still being used");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"the locked buffer is still being used");
|
||||
|
||||
// Erase the entry from the map.
|
||||
return eraseEntry(*Entry);
|
||||
@@ -1203,7 +1216,8 @@ Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) {
|
||||
|
||||
const EntryTy *Entry = findIntersecting(HstPtr);
|
||||
if (!Entry)
|
||||
return Plugin::error("Cannot find locked buffer");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"cannot find locked buffer");
|
||||
|
||||
// Unregister from the locked buffer. No need to do anything if there are
|
||||
// others using the allocation.
|
||||
@@ -1289,7 +1303,8 @@ Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
|
||||
|
||||
// No entry, but the automatic locking is enabled, so this is an error.
|
||||
if (!Entry)
|
||||
return Plugin::error("Locked buffer not found");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"locked buffer not found");
|
||||
|
||||
// There is entry, so unregister a user and check whether it was the last one.
|
||||
auto LastUseOrErr = unregisterEntryUse(*Entry);
|
||||
@@ -1312,7 +1327,8 @@ Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
|
||||
|
||||
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
|
||||
if (!AsyncInfo || !AsyncInfo->Queue)
|
||||
return Plugin::error("Invalid async info queue");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"invalid async info queue");
|
||||
|
||||
if (auto Err = synchronizeImpl(*AsyncInfo))
|
||||
return Err;
|
||||
@@ -1327,22 +1343,26 @@ Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
|
||||
|
||||
Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
|
||||
if (!AsyncInfo || !AsyncInfo->Queue)
|
||||
return Plugin::error("Invalid async info queue");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"invalid async info queue");
|
||||
|
||||
return queryAsyncImpl(*AsyncInfo);
|
||||
}
|
||||
|
||||
Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
|
||||
return Plugin::error("Device does not support VA Management");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"device does not support VA Management");
|
||||
}
|
||||
|
||||
Error GenericDeviceTy::memoryVAUnMap(void *VAddr, size_t Size) {
|
||||
return Plugin::error("Device does not support VA Management");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"device does not support VA Management");
|
||||
}
|
||||
|
||||
Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) {
|
||||
return Plugin::error(
|
||||
"Missing getDeviceMemorySize implementation (required by RR-heuristic");
|
||||
ErrorCode::UNIMPLEMENTED,
|
||||
"missing getDeviceMemorySize implementation (required by RR-heuristic");
|
||||
}
|
||||
|
||||
Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
|
||||
@@ -1359,7 +1379,8 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
|
||||
if (MemoryManager) {
|
||||
Alloc = MemoryManager->allocate(Size, HostPtr);
|
||||
if (!Alloc)
|
||||
return Plugin::error("Failed to allocate from memory manager");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate from memory manager");
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
@@ -1367,13 +1388,15 @@ Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
|
||||
case TARGET_ALLOC_SHARED:
|
||||
Alloc = allocate(Size, HostPtr, Kind);
|
||||
if (!Alloc)
|
||||
return Plugin::error("Failed to allocate from device allocator");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate from device allocator");
|
||||
}
|
||||
|
||||
// Report error if the memory manager or the device allocator did not return
|
||||
// any memory buffer.
|
||||
if (!Alloc)
|
||||
return Plugin::error("Invalid target data allocation kind or requested "
|
||||
return Plugin::error(ErrorCode::UNIMPLEMENTED,
|
||||
"invalid target data allocation kind or requested "
|
||||
"allocator not implemented yet");
|
||||
|
||||
// Register allocated buffer as pinned memory if the type is host memory.
|
||||
@@ -1448,7 +1471,8 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
|
||||
Res = MemoryManager->free(TgtPtr);
|
||||
if (Res)
|
||||
return Plugin::error(
|
||||
"Failure to deallocate device pointer %p via memory manager",
|
||||
ErrorCode::OUT_OF_RESOURCES,
|
||||
"failure to deallocate device pointer %p via memory manager",
|
||||
TgtPtr);
|
||||
break;
|
||||
}
|
||||
@@ -1458,7 +1482,8 @@ Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
|
||||
Res = free(TgtPtr, Kind);
|
||||
if (Res)
|
||||
return Plugin::error(
|
||||
"Failure to deallocate device pointer %p via device deallocator",
|
||||
ErrorCode::UNKNOWN,
|
||||
"failure to deallocate device pointer %p via device deallocator",
|
||||
TgtPtr);
|
||||
}
|
||||
|
||||
|
||||
@@ -176,7 +176,8 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
|
||||
TARGET_ALLOC_HOST);
|
||||
if (!RPCBuffer)
|
||||
return plugin::Plugin::error(
|
||||
"Failed to initialize RPC server for device %d", Device.getDeviceId());
|
||||
error::ErrorCode::UNKNOWN,
|
||||
"failed to initialize RPC server for device %d", Device.getDeviceId());
|
||||
|
||||
// Get the address of the RPC client from the device.
|
||||
plugin::GlobalTy ClientGlobal("__llvm_rpc_client", sizeof(rpc::Client));
|
||||
|
||||
@@ -106,6 +106,7 @@ typedef enum cudaError_enum {
|
||||
CUDA_ERROR_INVALID_VALUE = 1,
|
||||
CUDA_ERROR_NO_DEVICE = 100,
|
||||
CUDA_ERROR_INVALID_HANDLE = 400,
|
||||
CUDA_ERROR_NOT_FOUND = 500,
|
||||
CUDA_ERROR_NOT_READY = 600,
|
||||
CUDA_ERROR_TOO_MANY_PEERS = 711,
|
||||
} CUresult;
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
|
||||
using namespace error;
|
||||
|
||||
namespace llvm {
|
||||
namespace omp {
|
||||
namespace target {
|
||||
@@ -87,7 +89,7 @@ struct CUDADeviceImageTy : public DeviceImageTy {
|
||||
assert(!Module && "Module already loaded");
|
||||
|
||||
CUresult Res = cuModuleLoadDataEx(&Module, getStart(), 0, nullptr, nullptr);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuModuleLoadDataEx: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuModuleLoadDataEx: %s"))
|
||||
return Err;
|
||||
|
||||
return Plugin::success();
|
||||
@@ -98,7 +100,7 @@ struct CUDADeviceImageTy : public DeviceImageTy {
|
||||
assert(Module && "Module not loaded");
|
||||
|
||||
CUresult Res = cuModuleUnload(Module);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuModuleUnload: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuModuleUnload: %s"))
|
||||
return Err;
|
||||
|
||||
Module = nullptr;
|
||||
@@ -128,18 +130,19 @@ struct CUDAKernelTy : public GenericKernelTy {
|
||||
|
||||
// Retrieve the function pointer of the kernel.
|
||||
Res = cuModuleGetFunction(&Func, CUDAImage.getModule(), getName());
|
||||
if (auto Err = Plugin::check(Res, "Error in cuModuleGetFunction('%s'): %s",
|
||||
if (auto Err = Plugin::check(Res, "error in cuModuleGetFunction('%s'): %s",
|
||||
getName()))
|
||||
return Err;
|
||||
|
||||
// Check that the function pointer is valid.
|
||||
if (!Func)
|
||||
return Plugin::error("Invalid function for kernel %s", getName());
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"invalid function for kernel %s", getName());
|
||||
|
||||
int MaxThreads;
|
||||
Res = cuFuncGetAttribute(&MaxThreads,
|
||||
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Func);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuFuncGetAttribute: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuFuncGetAttribute: %s"))
|
||||
return Err;
|
||||
|
||||
// The maximum number of threads cannot exceed the maximum of the kernel.
|
||||
@@ -175,10 +178,11 @@ struct CUDAStreamRef final : public GenericDeviceResourceRef {
|
||||
/// before calling to this function.
|
||||
Error create(GenericDeviceTy &Device) override {
|
||||
if (Stream)
|
||||
return Plugin::error("Creating an existing stream");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"creating an existing stream");
|
||||
|
||||
CUresult Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuStreamCreate: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuStreamCreate: %s"))
|
||||
return Err;
|
||||
|
||||
return Plugin::success();
|
||||
@@ -188,10 +192,11 @@ struct CUDAStreamRef final : public GenericDeviceResourceRef {
|
||||
/// must be to a valid stream before calling to this function.
|
||||
Error destroy(GenericDeviceTy &Device) override {
|
||||
if (!Stream)
|
||||
return Plugin::error("Destroying an invalid stream");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"destroying an invalid stream");
|
||||
|
||||
CUresult Res = cuStreamDestroy(Stream);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuStreamDestroy: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuStreamDestroy: %s"))
|
||||
return Err;
|
||||
|
||||
Stream = nullptr;
|
||||
@@ -222,10 +227,11 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
|
||||
/// before calling to this function.
|
||||
Error create(GenericDeviceTy &Device) override {
|
||||
if (Event)
|
||||
return Plugin::error("Creating an existing event");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"creating an existing event");
|
||||
|
||||
CUresult Res = cuEventCreate(&Event, CU_EVENT_DEFAULT);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuEventCreate: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuEventCreate: %s"))
|
||||
return Err;
|
||||
|
||||
return Plugin::success();
|
||||
@@ -235,10 +241,11 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
|
||||
/// must be to a valid event before calling to this function.
|
||||
Error destroy(GenericDeviceTy &Device) override {
|
||||
if (!Event)
|
||||
return Plugin::error("Destroying an invalid event");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"destroying an invalid event");
|
||||
|
||||
CUresult Res = cuEventDestroy(Event);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuEventDestroy: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuEventDestroy: %s"))
|
||||
return Err;
|
||||
|
||||
Event = nullptr;
|
||||
@@ -266,7 +273,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
/// Initialize the device, its resources and get its properties.
|
||||
Error initImpl(GenericPluginTy &Plugin) override {
|
||||
CUresult Res = cuDeviceGet(&Device, DeviceId);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDeviceGet: %s"))
|
||||
return Err;
|
||||
|
||||
// Query the current flags of the primary context and set its flags if
|
||||
@@ -276,7 +283,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
Res = cuDevicePrimaryCtxGetState(Device, &FormerPrimaryCtxFlags,
|
||||
&FormerPrimaryCtxIsActive);
|
||||
if (auto Err =
|
||||
Plugin::check(Res, "Error in cuDevicePrimaryCtxGetState: %s"))
|
||||
Plugin::check(Res, "error in cuDevicePrimaryCtxGetState: %s"))
|
||||
return Err;
|
||||
|
||||
if (FormerPrimaryCtxIsActive) {
|
||||
@@ -292,14 +299,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
"CU_CTX_SCHED_BLOCKING_SYNC\n");
|
||||
Res = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC);
|
||||
if (auto Err =
|
||||
Plugin::check(Res, "Error in cuDevicePrimaryCtxSetFlags: %s"))
|
||||
Plugin::check(Res, "error in cuDevicePrimaryCtxSetFlags: %s"))
|
||||
return Err;
|
||||
}
|
||||
|
||||
// Retain the per device primary context and save it to use whenever this
|
||||
// device is selected.
|
||||
Res = cuDevicePrimaryCtxRetain(&Context, Device);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDevicePrimaryCtxRetain: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDevicePrimaryCtxRetain: %s"))
|
||||
return Err;
|
||||
|
||||
if (auto Err = setContext())
|
||||
@@ -382,7 +389,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
if (Context) {
|
||||
CUresult Res = cuDevicePrimaryCtxRelease(Device);
|
||||
if (auto Err =
|
||||
Plugin::check(Res, "Error in cuDevicePrimaryCtxRelease: %s"))
|
||||
Plugin::check(Res, "error in cuDevicePrimaryCtxRelease: %s"))
|
||||
return Err;
|
||||
}
|
||||
|
||||
@@ -419,7 +426,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
std::error_code EC = sys::fs::createTemporaryFile("nvptx-pre-link-jit", "s",
|
||||
PTXInputFilePath);
|
||||
if (EC)
|
||||
return Plugin::error("Failed to create temporary file for ptxas");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to create temporary file for ptxas");
|
||||
|
||||
// Write the file's contents to the output file.
|
||||
Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
|
||||
@@ -435,12 +443,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
EC = sys::fs::createTemporaryFile("nvptx-post-link-jit", "cubin",
|
||||
PTXOutputFilePath);
|
||||
if (EC)
|
||||
return Plugin::error("Failed to create temporary file for ptxas");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to create temporary file for ptxas");
|
||||
|
||||
// Try to find `ptxas` in the path to compile the PTX to a binary.
|
||||
const auto ErrorOrPath = sys::findProgramByName("ptxas");
|
||||
if (!ErrorOrPath)
|
||||
return Plugin::error("Failed to find 'ptxas' on the PATH.");
|
||||
return Plugin::error(ErrorCode::HOST_TOOL_NOT_FOUND,
|
||||
"failed to find 'ptxas' on the PATH.");
|
||||
|
||||
std::string Arch = getComputeUnitKind();
|
||||
StringRef Args[] = {*ErrorOrPath,
|
||||
@@ -455,17 +465,21 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
std::string ErrMsg;
|
||||
if (sys::ExecuteAndWait(*ErrorOrPath, Args, std::nullopt, {}, 0, 0,
|
||||
&ErrMsg))
|
||||
return Plugin::error("Running 'ptxas' failed: %s\n", ErrMsg.c_str());
|
||||
return Plugin::error(ErrorCode::ASSEMBLE_FAILURE,
|
||||
"running 'ptxas' failed: %s\n", ErrMsg.c_str());
|
||||
|
||||
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(PTXOutputFilePath.data());
|
||||
if (!BufferOrErr)
|
||||
return Plugin::error("Failed to open temporary file for ptxas");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to open temporary file for ptxas");
|
||||
|
||||
// Clean up the temporary files afterwards.
|
||||
if (sys::fs::remove(PTXOutputFilePath))
|
||||
return Plugin::error("Failed to remove temporary file for ptxas");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to remove temporary file for ptxas");
|
||||
if (sys::fs::remove(PTXInputFilePath))
|
||||
return Plugin::error("Failed to remove temporary file for ptxas");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to remove temporary file for ptxas");
|
||||
|
||||
return std::move(*BufferOrErr);
|
||||
}
|
||||
@@ -475,7 +489,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
// Allocate and construct the CUDA kernel.
|
||||
CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>();
|
||||
if (!CUDAKernel)
|
||||
return Plugin::error("Failed to allocate memory for CUDA kernel");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate memory for CUDA kernel");
|
||||
|
||||
new (CUDAKernel) CUDAKernelTy(Name);
|
||||
|
||||
@@ -485,7 +500,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
/// Set the current context to this device's context.
|
||||
Error setContext() override {
|
||||
CUresult Res = cuCtxSetCurrent(Context);
|
||||
return Plugin::check(Res, "Error in cuCtxSetCurrent: %s");
|
||||
return Plugin::check(Res, "error in cuCtxSetCurrent: %s");
|
||||
}
|
||||
|
||||
/// NVIDIA returns the product of the SM count and the number of warps that
|
||||
@@ -579,7 +594,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
}
|
||||
|
||||
if (auto Err =
|
||||
Plugin::check(Res, "Error in cuMemAlloc[Host|Managed]: %s")) {
|
||||
Plugin::check(Res, "error in cuMemAlloc[Host|Managed]: %s")) {
|
||||
REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
|
||||
return nullptr;
|
||||
}
|
||||
@@ -617,7 +632,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
}
|
||||
}
|
||||
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemFree[Host]: %s")) {
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {
|
||||
REPORT("Failure to free memory: %s\n", toString(std::move(Err)).data());
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
@@ -637,7 +652,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
if (auto Err = CUDAStreamManager.returnResource(Stream))
|
||||
return Err;
|
||||
|
||||
return Plugin::check(Res, "Error in cuStreamSynchronize: %s");
|
||||
return Plugin::check(Res, "error in cuStreamSynchronize: %s");
|
||||
}
|
||||
|
||||
/// CUDA support VA management
|
||||
@@ -658,11 +673,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
size_t Size = *RSize;
|
||||
|
||||
if (Size == 0)
|
||||
return Plugin::error("Memory Map Size must be larger than 0");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"memory Map Size must be larger than 0");
|
||||
|
||||
// Check if we have already mapped this address
|
||||
if (IHandle != DeviceMMaps.end())
|
||||
return Plugin::error("Address already memory mapped");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"address already memory mapped");
|
||||
|
||||
CUmemAllocationProp Prop = {};
|
||||
size_t Granularity = 0;
|
||||
@@ -675,7 +692,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
if (Size >= Free) {
|
||||
*Addr = nullptr;
|
||||
return Plugin::error(
|
||||
"Cannot map memory size larger than the available device memory");
|
||||
ErrorCode::OUT_OF_RESOURCES,
|
||||
"cannot map memory size larger than the available device memory");
|
||||
}
|
||||
|
||||
// currently NVidia only supports pinned device types
|
||||
@@ -686,11 +704,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
cuMemGetAllocationGranularity(&Granularity, &Prop,
|
||||
CU_MEM_ALLOC_GRANULARITY_MINIMUM);
|
||||
if (auto Err =
|
||||
Plugin::check(Res, "Error in cuMemGetAllocationGranularity: %s"))
|
||||
Plugin::check(Res, "error in cuMemGetAllocationGranularity: %s"))
|
||||
return Err;
|
||||
|
||||
if (Granularity == 0)
|
||||
return Plugin::error("Wrong device Page size");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"wrong device Page size");
|
||||
|
||||
// Ceil to page size.
|
||||
Size = utils::roundUp(Size, Granularity);
|
||||
@@ -698,16 +717,16 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
// Create a handler of our allocation
|
||||
CUmemGenericAllocationHandle AHandle;
|
||||
Res = cuMemCreate(&AHandle, Size, &Prop, 0);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemCreate: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemCreate: %s"))
|
||||
return Err;
|
||||
|
||||
CUdeviceptr DevPtr = 0;
|
||||
Res = cuMemAddressReserve(&DevPtr, Size, 0, DVAddr, 0);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemAddressReserve: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemAddressReserve: %s"))
|
||||
return Err;
|
||||
|
||||
Res = cuMemMap(DevPtr, Size, 0, AHandle, 0);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemMap: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemMap: %s"))
|
||||
return Err;
|
||||
|
||||
CUmemAccessDesc ADesc = {};
|
||||
@@ -717,7 +736,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
|
||||
// Sets address
|
||||
Res = cuMemSetAccess(DevPtr, Size, &ADesc, 1);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemSetAccess: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemSetAccess: %s"))
|
||||
return Err;
|
||||
|
||||
*Addr = reinterpret_cast<void *>(DevPtr);
|
||||
@@ -732,24 +751,26 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
auto IHandle = DeviceMMaps.find(DVAddr);
|
||||
// Mapping does not exist
|
||||
if (IHandle == DeviceMMaps.end()) {
|
||||
return Plugin::error("Addr is not MemoryMapped");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"addr is not MemoryMapped");
|
||||
}
|
||||
|
||||
if (IHandle == DeviceMMaps.end())
|
||||
return Plugin::error("Addr is not MemoryMapped");
|
||||
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
|
||||
"addr is not MemoryMapped");
|
||||
|
||||
CUmemGenericAllocationHandle &AllocHandle = IHandle->second;
|
||||
|
||||
CUresult Res = cuMemUnmap(DVAddr, Size);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemUnmap: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemUnmap: %s"))
|
||||
return Err;
|
||||
|
||||
Res = cuMemRelease(AllocHandle);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemRelease: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemRelease: %s"))
|
||||
return Err;
|
||||
|
||||
Res = cuMemAddressFree(DVAddr, Size);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuMemAddressFree: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuMemAddressFree: %s"))
|
||||
return Err;
|
||||
|
||||
DeviceMMaps.erase(IHandle);
|
||||
@@ -772,7 +793,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
if (auto Err = CUDAStreamManager.returnResource(Stream))
|
||||
return Err;
|
||||
|
||||
return Plugin::check(Res, "Error in cuStreamQuery: %s");
|
||||
return Plugin::check(Res, "error in cuStreamQuery: %s");
|
||||
}
|
||||
|
||||
Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) override {
|
||||
@@ -800,7 +821,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
return Err;
|
||||
|
||||
CUresult Res = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
|
||||
return Plugin::check(Res, "Error in cuMemcpyHtoDAsync: %s");
|
||||
return Plugin::check(Res, "error in cuMemcpyHtoDAsync: %s");
|
||||
}
|
||||
|
||||
/// Retrieve data from the device (device to host transfer).
|
||||
@@ -814,7 +835,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
return Err;
|
||||
|
||||
CUresult Res = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
|
||||
return Plugin::check(Res, "Error in cuMemcpyDtoHAsync: %s");
|
||||
return Plugin::check(Res, "error in cuMemcpyDtoHAsync: %s");
|
||||
}
|
||||
|
||||
/// Exchange data between two devices directly. We may use peer access if
|
||||
@@ -874,7 +895,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
return Err;
|
||||
|
||||
CUresult Res = cuEventRecord(Event, Stream);
|
||||
return Plugin::check(Res, "Error in cuEventRecord: %s");
|
||||
return Plugin::check(Res, "error in cuEventRecord: %s");
|
||||
}
|
||||
|
||||
/// Make the stream wait on the event.
|
||||
@@ -890,14 +911,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
// specific CUDA version, and defined as 0x0. In previous version, per CUDA
|
||||
// API document, that argument has to be 0x0.
|
||||
CUresult Res = cuStreamWaitEvent(Stream, Event, 0);
|
||||
return Plugin::check(Res, "Error in cuStreamWaitEvent: %s");
|
||||
return Plugin::check(Res, "error in cuStreamWaitEvent: %s");
|
||||
}
|
||||
|
||||
/// Synchronize the current thread with the event.
|
||||
Error syncEventImpl(void *EventPtr) override {
|
||||
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
|
||||
CUresult Res = cuEventSynchronize(Event);
|
||||
return Plugin::check(Res, "Error in cuEventSynchronize: %s");
|
||||
return Plugin::check(Res, "error in cuEventSynchronize: %s");
|
||||
}
|
||||
|
||||
/// Print information about the device.
|
||||
@@ -1089,17 +1110,17 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
}
|
||||
Error getDeviceMemorySize(uint64_t &Value) override {
|
||||
CUresult Res = cuDeviceTotalMem(&Value, Device);
|
||||
return Plugin::check(Res, "Error in getDeviceMemorySize %s");
|
||||
return Plugin::check(Res, "error in getDeviceMemorySize %s");
|
||||
}
|
||||
|
||||
/// CUDA-specific functions for getting and setting context limits.
|
||||
Error setCtxLimit(CUlimit Kind, uint64_t Value) {
|
||||
CUresult Res = cuCtxSetLimit(Kind, Value);
|
||||
return Plugin::check(Res, "Error in cuCtxSetLimit: %s");
|
||||
return Plugin::check(Res, "error in cuCtxSetLimit: %s");
|
||||
}
|
||||
Error getCtxLimit(CUlimit Kind, uint64_t &Value) {
|
||||
CUresult Res = cuCtxGetLimit(&Value, Kind);
|
||||
return Plugin::check(Res, "Error in cuCtxGetLimit: %s");
|
||||
return Plugin::check(Res, "error in cuCtxGetLimit: %s");
|
||||
}
|
||||
|
||||
/// CUDA-specific function to get device attributes.
|
||||
@@ -1107,7 +1128,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
||||
// TODO: Warn if the new value is larger than the old.
|
||||
CUresult Res =
|
||||
cuDeviceGetAttribute((int *)&Value, (CUdevice_attribute)Kind, Device);
|
||||
return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
|
||||
return Plugin::check(Res, "error in cuDeviceGetAttribute: %s");
|
||||
}
|
||||
|
||||
CUresult getDeviceAttrRaw(uint32_t Kind, int &Value) {
|
||||
@@ -1156,7 +1177,8 @@ private:
|
||||
|
||||
uint16_t Priority;
|
||||
if (NameOrErr->rsplit('_').second.getAsInteger(10, Priority))
|
||||
return Plugin::error("Invalid priority for constructor or destructor");
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"invalid priority for constructor or destructor");
|
||||
|
||||
Funcs.emplace_back(*NameOrErr, Priority);
|
||||
}
|
||||
@@ -1169,7 +1191,8 @@ private:
|
||||
void *Buffer =
|
||||
allocate(Funcs.size() * sizeof(void *), nullptr, TARGET_ALLOC_DEVICE);
|
||||
if (!Buffer)
|
||||
return Plugin::error("Failed to allocate memory for global buffer");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate memory for global buffer");
|
||||
|
||||
auto *GlobalPtrStart = reinterpret_cast<uintptr_t *>(Buffer);
|
||||
auto *GlobalPtrStop = reinterpret_cast<uintptr_t *>(Buffer) + Funcs.size();
|
||||
@@ -1217,7 +1240,8 @@ private:
|
||||
AsyncInfoWrapper.finalize(Err);
|
||||
|
||||
if (free(Buffer, TARGET_ALLOC_DEVICE) != OFFLOAD_SUCCESS)
|
||||
return Plugin::error("Failed to free memory for global buffer");
|
||||
return Plugin::error(ErrorCode::UNKNOWN,
|
||||
"failed to free memory for global buffer");
|
||||
|
||||
return Err;
|
||||
}
|
||||
@@ -1290,7 +1314,7 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
},
|
||||
&GenericDevice.Plugin);
|
||||
|
||||
return Plugin::check(Res, "Error in cuLaunchKernel for '%s': %s", getName());
|
||||
return Plugin::check(Res, "error in cuLaunchKernel for '%s': %s", getName());
|
||||
}
|
||||
|
||||
/// Class implementing the CUDA-specific functionalities of the global handler.
|
||||
@@ -1310,13 +1334,14 @@ public:
|
||||
CUdeviceptr CUPtr;
|
||||
CUresult Res =
|
||||
cuModuleGetGlobal(&CUPtr, &CUSize, CUDAImage.getModule(), GlobalName);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuModuleGetGlobal for '%s': %s",
|
||||
if (auto Err = Plugin::check(Res, "error in cuModuleGetGlobal for '%s': %s",
|
||||
GlobalName))
|
||||
return Err;
|
||||
|
||||
if (CUSize != DeviceGlobal.getSize())
|
||||
return Plugin::error(
|
||||
"Failed to load global '%s' due to size mismatch (%zu != %zu)",
|
||||
ErrorCode::INVALID_BINARY,
|
||||
"failed to load global '%s' due to size mismatch (%zu != %zu)",
|
||||
GlobalName, CUSize, (size_t)DeviceGlobal.getSize());
|
||||
|
||||
DeviceGlobal.setPtr(reinterpret_cast<void *>(CUPtr));
|
||||
@@ -1348,13 +1373,13 @@ struct CUDAPluginTy final : public GenericPluginTy {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (auto Err = Plugin::check(Res, "Error in cuInit: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuInit: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
// Get the number of devices.
|
||||
int NumDevices;
|
||||
Res = cuDeviceGetCount(&NumDevices);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDeviceGetCount: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDeviceGetCount: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
// Do not initialize if there are no devices.
|
||||
@@ -1402,18 +1427,18 @@ struct CUDAPluginTy final : public GenericPluginTy {
|
||||
|
||||
CUdevice Device;
|
||||
CUresult Res = cuDeviceGet(&Device, DeviceId);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDeviceGet: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
int32_t Major, Minor;
|
||||
Res = cuDeviceGetAttribute(
|
||||
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDeviceGetAttribute: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
Res = cuDeviceGetAttribute(
|
||||
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device);
|
||||
if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
|
||||
if (auto Err = Plugin::check(Res, "error in cuDeviceGetAttribute: %s"))
|
||||
return std::move(Err);
|
||||
|
||||
int32_t ImageMajor = SM / 10;
|
||||
@@ -1465,7 +1490,7 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr,
|
||||
CanAccessPeer = 0;
|
||||
DP("Too many P2P so fall back to D2D memcpy");
|
||||
} else if (auto Err =
|
||||
Plugin::check(Res, "Error in cuCtxEnablePeerAccess: %s"))
|
||||
Plugin::check(Res, "error in cuCtxEnablePeerAccess: %s"))
|
||||
return Err;
|
||||
}
|
||||
PeerAccesses[DstDeviceId] = (CanAccessPeer)
|
||||
@@ -1482,27 +1507,37 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr,
|
||||
// TODO: Should we fallback to D2D if peer access fails?
|
||||
Res = cuMemcpyPeerAsync(CUDstPtr, Context, CUSrcPtr, DstDevice.Context,
|
||||
Size, Stream);
|
||||
return Plugin::check(Res, "Error in cuMemcpyPeerAsync: %s");
|
||||
return Plugin::check(Res, "error in cuMemcpyPeerAsync: %s");
|
||||
}
|
||||
|
||||
// Fallback to D2D copy.
|
||||
Res = cuMemcpyDtoDAsync(CUDstPtr, CUSrcPtr, Size, Stream);
|
||||
return Plugin::check(Res, "Error in cuMemcpyDtoDAsync: %s");
|
||||
return Plugin::check(Res, "error in cuMemcpyDtoDAsync: %s");
|
||||
}
|
||||
|
||||
template <typename... ArgsTy>
|
||||
static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
|
||||
CUresult ResultCode = static_cast<CUresult>(Code);
|
||||
if (ResultCode == CUDA_SUCCESS)
|
||||
return Error::success();
|
||||
return Plugin::success();
|
||||
|
||||
const char *Desc = "Unknown error";
|
||||
CUresult Ret = cuGetErrorString(ResultCode, &Desc);
|
||||
if (Ret != CUDA_SUCCESS)
|
||||
REPORT("Unrecognized " GETNAME(TARGET_NAME) " error code %d\n", Code);
|
||||
|
||||
return createStringError<ArgsTy..., const char *>(inconvertibleErrorCode(),
|
||||
ErrFmt, Args..., Desc);
|
||||
// TODO: Add more entries to this switch
|
||||
ErrorCode OffloadErrCode;
|
||||
switch (ResultCode) {
|
||||
case CUDA_ERROR_NOT_FOUND:
|
||||
OffloadErrCode = ErrorCode::NOT_FOUND;
|
||||
break;
|
||||
default:
|
||||
OffloadErrCode = ErrorCode::UNKNOWN;
|
||||
}
|
||||
|
||||
// TODO: Create a map for CUDA error codes to Offload error codes
|
||||
return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
|
||||
}
|
||||
|
||||
} // namespace plugin
|
||||
|
||||
@@ -56,6 +56,7 @@ struct GenELF64DeviceTy;
|
||||
struct GenELF64PluginTy;
|
||||
|
||||
using llvm::sys::DynamicLibrary;
|
||||
using namespace error;
|
||||
|
||||
/// Class implementing kernel functionalities for GenELF64.
|
||||
struct GenELF64KernelTy : public GenericKernelTy {
|
||||
@@ -74,7 +75,8 @@ struct GenELF64KernelTy : public GenericKernelTy {
|
||||
|
||||
// Check that the function pointer is valid.
|
||||
if (!Global.getPtr())
|
||||
return Plugin::error("Invalid function for kernel %s", getName());
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"invalid function for kernel %s", getName());
|
||||
|
||||
// Save the function pointer.
|
||||
Func = (void (*)())Global.getPtr();
|
||||
@@ -102,7 +104,8 @@ struct GenELF64KernelTy : public GenericKernelTy {
|
||||
ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs.NumArgs,
|
||||
&ffi_type_void, ArgTypesPtr);
|
||||
if (Status != FFI_OK)
|
||||
return Plugin::error("Error in ffi_prep_cif: %d", Status);
|
||||
return Plugin::error(ErrorCode::UNKNOWN, "error in ffi_prep_cif: %d",
|
||||
Status);
|
||||
|
||||
// Call the kernel function through libffi.
|
||||
long Return;
|
||||
@@ -155,7 +158,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
// Allocate and construct the kernel.
|
||||
GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>();
|
||||
if (!GenELF64Kernel)
|
||||
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
|
||||
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
|
||||
"failed to allocate memory for GenELF64 kernel");
|
||||
|
||||
new (GenELF64Kernel) GenELF64KernelTy(Name);
|
||||
|
||||
@@ -176,24 +180,28 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
char TmpFileName[] = "/tmp/tmpfile_XXXXXX";
|
||||
int TmpFileFd = mkstemp(TmpFileName);
|
||||
if (TmpFileFd == -1)
|
||||
return Plugin::error("Failed to create tmpfile for loading target image");
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to create tmpfile for loading target image");
|
||||
|
||||
// Open the temporary file.
|
||||
FILE *TmpFile = fdopen(TmpFileFd, "wb");
|
||||
if (!TmpFile)
|
||||
return Plugin::error("Failed to open tmpfile %s for loading target image",
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to open tmpfile %s for loading target image",
|
||||
TmpFileName);
|
||||
|
||||
// Write the image into the temporary file.
|
||||
size_t Written = fwrite(Image->getStart(), Image->getSize(), 1, TmpFile);
|
||||
if (Written != 1)
|
||||
return Plugin::error("Failed to write target image to tmpfile %s",
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to write target image to tmpfile %s",
|
||||
TmpFileName);
|
||||
|
||||
// Close the temporary file.
|
||||
int Ret = fclose(TmpFile);
|
||||
if (Ret)
|
||||
return Plugin::error("Failed to close tmpfile %s with the target image",
|
||||
return Plugin::error(ErrorCode::HOST_IO,
|
||||
"failed to close tmpfile %s with the target image",
|
||||
TmpFileName);
|
||||
|
||||
// Load the temporary file as a dynamic library.
|
||||
@@ -203,7 +211,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
|
||||
// Check if the loaded library is valid.
|
||||
if (!DynLib.isValid())
|
||||
return Plugin::error("Failed to load target image: %s", ErrMsg.c_str());
|
||||
return Plugin::error(ErrorCode::INVALID_BINARY,
|
||||
"failed to load target image: %s", ErrMsg.c_str());
|
||||
|
||||
// Save a reference of the image's dynamic library.
|
||||
Image->setDynamicLibrary(DynLib);
|
||||
@@ -272,7 +281,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
|
||||
// This function should never be called because the function
|
||||
// GenELF64PluginTy::isDataExchangable() returns false.
|
||||
return Plugin::error("dataExchangeImpl not supported");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"dataExchangeImpl not supported");
|
||||
}
|
||||
|
||||
/// All functions are already synchronous. No need to do anything on this
|
||||
@@ -289,12 +299,14 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
|
||||
|
||||
/// This plugin does not support interoperability
|
||||
Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
|
||||
return Plugin::error("initAsyncInfoImpl not supported");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"initAsyncInfoImpl not supported");
|
||||
}
|
||||
|
||||
/// This plugin does not support interoperability
|
||||
Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override {
|
||||
return Plugin::error("initDeviceInfoImpl not supported");
|
||||
return Plugin::error(ErrorCode::UNSUPPORTED,
|
||||
"initDeviceInfoImpl not supported");
|
||||
}
|
||||
|
||||
/// This plugin does not support the event API. Do nothing without failing.
|
||||
@@ -365,7 +377,8 @@ public:
|
||||
// Get the address of the symbol.
|
||||
void *Addr = DynLib.getAddressOfSymbol(GlobalName);
|
||||
if (Addr == nullptr) {
|
||||
return Plugin::error("Failed to load global '%s'", GlobalName);
|
||||
return Plugin::error(ErrorCode::NOT_FOUND, "failed to load global '%s'",
|
||||
GlobalName);
|
||||
}
|
||||
|
||||
// Save the pointer to the symbol.
|
||||
@@ -387,7 +400,7 @@ struct GenELF64PluginTy final : public GenericPluginTy {
|
||||
/// Initialize the plugin and return the number of devices.
|
||||
Expected<int32_t> initImpl() override {
|
||||
#ifdef USES_DYNAMIC_FFI
|
||||
if (auto Err = Plugin::check(ffi_init(), "Failed to initialize libffi"))
|
||||
if (auto Err = Plugin::check(ffi_init(), "failed to initialize libffi"))
|
||||
return std::move(Err);
|
||||
#endif
|
||||
|
||||
@@ -455,10 +468,10 @@ struct GenELF64PluginTy final : public GenericPluginTy {
|
||||
template <typename... ArgsTy>
|
||||
static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) {
|
||||
if (Code == 0)
|
||||
return Error::success();
|
||||
return Plugin::success();
|
||||
|
||||
return createStringError<ArgsTy..., const char *>(
|
||||
inconvertibleErrorCode(), ErrMsg, Args..., std::to_string(Code).data());
|
||||
return Plugin::error(ErrorCode::UNKNOWN, ErrMsg, Args...,
|
||||
std::to_string(Code).data());
|
||||
}
|
||||
|
||||
} // namespace plugin
|
||||
|
||||
@@ -31,8 +31,8 @@ TEST_P(olGetKernelTest, InvalidNullKernelPointer) {
|
||||
}
|
||||
|
||||
// Error code returning from plugin interface not yet supported
|
||||
TEST_F(olGetKernelTest, DISABLED_InvalidKernelName) {
|
||||
TEST_P(olGetKernelTest, InvalidKernelName) {
|
||||
ol_kernel_handle_t Kernel = nullptr;
|
||||
ASSERT_ERROR(OL_ERRC_INVALID_KERNEL_NAME,
|
||||
ASSERT_ERROR(OL_ERRC_NOT_FOUND,
|
||||
olGetKernel(Program, "invalid_kernel_name", &Kernel));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user