[libomptarget] Implement host plugin for amdgpu Replacement for D71384. Primary difference is inlining the dependency on atmi followed by extensive simplification and bugfixes. This is the latest version from https://github.com/ROCm-Developer-Tools/amd-llvm-project/tree/aomp12 with minor patches and a rename from hsa to amdgpu, on the basis that this can't be used by other implementations of hsa without additional work. This will not build unless the ROCM_DIR variable is passed so won't break other builds. That variable is used to locate two amdgpu specific libraries that ship as part of rocm: libhsakmt at https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface libhsa-runtime64 at https://github.com/RadeonOpenCompute/ROCR-Runtime These libraries build from source. The build scripts in those repos are for shared libraries, but can be adapted to statically link both into this plugin. There are caveats. - This works well enough to run various tests and benchmarks, and will be used to support the current clang bring up - It is adequately thread safe for the above but there will be races remaining - It is not stylistically correct for llvm, though has had clang-format run - It has suboptimal memory management and locking strategies - The debug printing / error handling is inconsistent I would like to contribute this pretty much as-is and then improve it in-tree. This would be advantagous because the aomp12 branch that was in use for fixing this codebase has just been joined with the amd internal rocm dev process. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85742
137 lines
4.5 KiB
C++
137 lines
4.5 KiB
C++
/*===--------------------------------------------------------------------------
|
|
* ATMI (Asynchronous Task and Memory Interface)
|
|
*
|
|
* This file is distributed under the MIT License. See LICENSE.txt for details.
|
|
*===------------------------------------------------------------------------*/
|
|
#include "internal.h"
|
|
#include "rt.h"
|
|
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
#include <iostream>
|
|
#include <pthread.h>
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
|
|
/*
|
|
* Helper functions
|
|
*/
|
|
const char *get_atmi_error_string(atmi_status_t err) {
|
|
switch (err) {
|
|
case ATMI_STATUS_SUCCESS:
|
|
return "ATMI_STATUS_SUCCESS";
|
|
case ATMI_STATUS_UNKNOWN:
|
|
return "ATMI_STATUS_UNKNOWN";
|
|
case ATMI_STATUS_ERROR:
|
|
return "ATMI_STATUS_ERROR";
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
const char *get_error_string(hsa_status_t err) {
|
|
switch (err) {
|
|
case HSA_STATUS_SUCCESS:
|
|
return "HSA_STATUS_SUCCESS";
|
|
case HSA_STATUS_INFO_BREAK:
|
|
return "HSA_STATUS_INFO_BREAK";
|
|
case HSA_STATUS_ERROR:
|
|
return "HSA_STATUS_ERROR";
|
|
case HSA_STATUS_ERROR_INVALID_ARGUMENT:
|
|
return "HSA_STATUS_ERROR_INVALID_ARGUMENT";
|
|
case HSA_STATUS_ERROR_INVALID_QUEUE_CREATION:
|
|
return "HSA_STATUS_ERROR_INVALID_QUEUE_CREATION";
|
|
case HSA_STATUS_ERROR_INVALID_ALLOCATION:
|
|
return "HSA_STATUS_ERROR_INVALID_ALLOCATION";
|
|
case HSA_STATUS_ERROR_INVALID_AGENT:
|
|
return "HSA_STATUS_ERROR_INVALID_AGENT";
|
|
case HSA_STATUS_ERROR_INVALID_REGION:
|
|
return "HSA_STATUS_ERROR_INVALID_REGION";
|
|
case HSA_STATUS_ERROR_INVALID_SIGNAL:
|
|
return "HSA_STATUS_ERROR_INVALID_SIGNAL";
|
|
case HSA_STATUS_ERROR_INVALID_QUEUE:
|
|
return "HSA_STATUS_ERROR_INVALID_QUEUE";
|
|
case HSA_STATUS_ERROR_OUT_OF_RESOURCES:
|
|
return "HSA_STATUS_ERROR_OUT_OF_RESOURCES";
|
|
case HSA_STATUS_ERROR_INVALID_PACKET_FORMAT:
|
|
return "HSA_STATUS_ERROR_INVALID_PACKET_FORMAT";
|
|
case HSA_STATUS_ERROR_RESOURCE_FREE:
|
|
return "HSA_STATUS_ERROR_RESOURCE_FREE";
|
|
case HSA_STATUS_ERROR_NOT_INITIALIZED:
|
|
return "HSA_STATUS_ERROR_NOT_INITIALIZED";
|
|
case HSA_STATUS_ERROR_REFCOUNT_OVERFLOW:
|
|
return "HSA_STATUS_ERROR_REFCOUNT_OVERFLOW";
|
|
case HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS:
|
|
return "HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS";
|
|
case HSA_STATUS_ERROR_INVALID_INDEX:
|
|
return "HSA_STATUS_ERROR_INVALID_INDEX";
|
|
case HSA_STATUS_ERROR_INVALID_ISA:
|
|
return "HSA_STATUS_ERROR_INVALID_ISA";
|
|
case HSA_STATUS_ERROR_INVALID_ISA_NAME:
|
|
return "HSA_STATUS_ERROR_INVALID_ISA_NAME";
|
|
case HSA_STATUS_ERROR_INVALID_CODE_OBJECT:
|
|
return "HSA_STATUS_ERROR_INVALID_CODE_OBJECT";
|
|
case HSA_STATUS_ERROR_INVALID_EXECUTABLE:
|
|
return "HSA_STATUS_ERROR_INVALID_EXECUTABLE";
|
|
case HSA_STATUS_ERROR_FROZEN_EXECUTABLE:
|
|
return "HSA_STATUS_ERROR_FROZEN_EXECUTABLE";
|
|
case HSA_STATUS_ERROR_INVALID_SYMBOL_NAME:
|
|
return "HSA_STATUS_ERROR_INVALID_SYMBOL_NAME";
|
|
case HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED:
|
|
return "HSA_STATUS_ERROR_VARIABLE_ALREADY_DEFINED";
|
|
case HSA_STATUS_ERROR_VARIABLE_UNDEFINED:
|
|
return "HSA_STATUS_ERROR_VARIABLE_UNDEFINED";
|
|
case HSA_STATUS_ERROR_EXCEPTION:
|
|
return "HSA_STATUS_ERROR_EXCEPTION";
|
|
}
|
|
}
|
|
|
|
namespace core {
|
|
/*
|
|
* Environment variables
|
|
*/
|
|
void Environment::GetEnvAll() {
|
|
std::string var = GetEnv("ATMI_HELP");
|
|
if (!var.empty()) {
|
|
std::cout << "ATMI_MAX_HSA_QUEUE_SIZE : positive integer" << std::endl
|
|
<< "ATMI_MAX_KERNEL_TYPES : positive integer" << std::endl
|
|
<< "ATMI_DEVICE_GPU_WORKERS : positive integer" << std::endl
|
|
<< "ATMI_DEVICE_CPU_WORKERS : positive integer" << std::endl
|
|
<< "ATMI_DEBUG : 1 for printing out trace/debug info"
|
|
<< std::endl;
|
|
exit(0);
|
|
}
|
|
|
|
var = GetEnv("ATMI_MAX_HSA_QUEUE_SIZE");
|
|
if (!var.empty())
|
|
max_queue_size_ = std::stoi(var);
|
|
|
|
var = GetEnv("ATMI_MAX_KERNEL_TYPES");
|
|
if (!var.empty())
|
|
max_kernel_types_ = std::stoi(var);
|
|
|
|
/* TODO: If we get a good use case for device-specific worker count, we
|
|
* should explore it, but let us keep the worker count uniform for all
|
|
* devices of a type until that time
|
|
*/
|
|
var = GetEnv("ATMI_DEVICE_GPU_WORKERS");
|
|
if (!var.empty())
|
|
num_gpu_queues_ = std::stoi(var);
|
|
|
|
/* TODO: If we get a good use case for device-specific worker count, we
|
|
* should explore it, but let us keep the worker count uniform for all
|
|
* devices of a type until that time
|
|
*/
|
|
var = GetEnv("ATMI_DEVICE_CPU_WORKERS");
|
|
if (!var.empty())
|
|
num_cpu_queues_ = std::stoi(var);
|
|
|
|
var = GetEnv("ATMI_DEBUG");
|
|
if (!var.empty())
|
|
debug_mode_ = std::stoi(var);
|
|
}
|
|
} // namespace core
|