//===- PluginInterface.cpp - Target independent plugin device interface ---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// #include "PluginInterface.h" #include "Debug.h" #include "GlobalHandler.h" #include "JIT.h" #include "elf_common.h" #include "omptarget.h" #include "omptargetplugin.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Support/Error.h" #include #include using namespace llvm; using namespace omp; using namespace target; using namespace plugin; GenericPluginTy *Plugin::SpecificPlugin = nullptr; AsyncInfoWrapperTy::~AsyncInfoWrapperTy() { // If we used a local async info object we want synchronous behavior. // In that case, and assuming the current status code is OK, we will // synchronize explicitly when the object is deleted. if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err) Err = Device.synchronize(&LocalAsyncInfo); } Error GenericKernelTy::init(GenericDeviceTy &GenericDevice, DeviceImageTy &Image) { PreferredNumThreads = getDefaultNumThreads(GenericDevice); MaxNumThreads = GenericDevice.getThreadLimit(); DynamicMemorySize = GenericDevice.getDynamicMemorySize(); return initImpl(GenericDevice, Image); } Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets, int32_t NumArgs, uint64_t NumTeamsClause, uint32_t ThreadLimitClause, uint64_t LoopTripCount, AsyncInfoWrapperTy &AsyncInfoWrapper) const { llvm::SmallVector Args; llvm::SmallVector Ptrs; void *KernelArgsPtr = prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, NumArgs, Args, Ptrs, AsyncInfoWrapper); uint32_t NumThreads = getNumThreads(GenericDevice, ThreadLimitClause); uint64_t NumBlocks = getNumBlocks(GenericDevice, NumTeamsClause, LoopTripCount, NumThreads); INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), "Launching kernel %s with %" PRIu64 " blocks and %d threads in %s mode\n", getName(), NumBlocks, NumThreads, getExecutionModeName()); return launchImpl(GenericDevice, NumThreads, NumBlocks, DynamicMemorySize, NumArgs, KernelArgsPtr, AsyncInfoWrapper); } void *GenericKernelTy::prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets, int32_t NumArgs, llvm::SmallVectorImpl &Args, llvm::SmallVectorImpl &Ptrs, AsyncInfoWrapperTy &AsyncInfoWrapper) const { Args.resize(NumArgs); Ptrs.resize(NumArgs); if (NumArgs == 0) return nullptr; for (int I = 0; I < NumArgs; ++I) { Ptrs[I] = (void *)((intptr_t)ArgPtrs[I] + ArgOffsets[I]); Args[I] = &Ptrs[I]; } return &Args[0]; } uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice, uint32_t ThreadLimitClause) const { if (ThreadLimitClause > 0 && isGenericMode()) ThreadLimitClause += GenericDevice.getWarpSize(); return std::min(MaxNumThreads, (ThreadLimitClause > 0) ? ThreadLimitClause : PreferredNumThreads); } uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, uint64_t NumTeamsClause, uint64_t LoopTripCount, uint32_t NumThreads) const { if (NumTeamsClause > 0) { // TODO: We need to honor any value and consequently allow more than the // block limit. For this we might need to start multiple kernels or let the // blocks start again until the requested number has been started. return std::min(NumTeamsClause, GenericDevice.getBlockLimit()); } uint64_t TripCountNumBlocks = std::numeric_limits::max(); if (LoopTripCount > 0) { if (isSPMDMode()) { // We have a combined construct, i.e. `target teams distribute // parallel for [simd]`. We launch so many teams so that each thread // will execute one iteration of the loop. round up to the nearest // integer TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1; } else { assert((isGenericMode() || isGenericSPMDMode()) && "Unexpected execution mode!"); // If we reach this point, then we have a non-combined construct, i.e. // `teams distribute` with a nested `parallel for` and each team is // assigned one iteration of the `distribute` loop. E.g.: // // #pragma omp target teams distribute // for(...loop_tripcount...) { // #pragma omp parallel for // for(...) {} // } // // Threads within a team will execute the iterations of the `parallel` // loop. TripCountNumBlocks = LoopTripCount; } } // If the loops are long running we rather reuse blocks than spawn too many. uint64_t PreferredNumBlocks = std::min(TripCountNumBlocks, getDefaultNumBlocks(GenericDevice)); return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit()); } GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices, const llvm::omp::GV &OMPGridValues) : MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"), OMP_NumTeams("OMP_NUM_TEAMS"), OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"), OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"), OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"), // Do not initialize the following two envars since they depend on the // device initialization. These cannot be consulted until the device is // initialized correctly. We intialize them in GenericDeviceTy::init(). OMPX_TargetStackSize(), OMPX_TargetHeapSize(), // By default, the initial number of streams and events are 32. OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 32), OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 32), DeviceId(DeviceId), GridValues(OMPGridValues), PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock() { if (OMP_NumTeams > 0) GridValues.GV_Max_Teams = std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams)); if (OMP_TeamsThreadLimit > 0) GridValues.GV_Max_WG_Size = std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit)); } Error GenericDeviceTy::init(GenericPluginTy &Plugin) { if (auto Err = initImpl(Plugin)) return Err; // Read and reinitialize the envars that depend on the device initialization. // Notice these two envars may change the stack size and heap size of the // device, so they need the device properly initialized. auto StackSizeEnvarOrErr = UInt64Envar::create( "LIBOMPTARGET_STACK_SIZE", [this](uint64_t &V) -> Error { return getDeviceStackSize(V); }, [this](uint64_t V) -> Error { return setDeviceStackSize(V); }); if (!StackSizeEnvarOrErr) return StackSizeEnvarOrErr.takeError(); OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr); auto HeapSizeEnvarOrErr = UInt64Envar::create( "LIBOMPTARGET_HEAP_SIZE", [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); }, [this](uint64_t V) -> Error { return setDeviceHeapSize(V); }); if (!HeapSizeEnvarOrErr) return HeapSizeEnvarOrErr.takeError(); OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr); // Enable the memory manager if required. auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv(); if (EnableMM) MemoryManager = new MemoryManagerTy(*this, ThresholdMM); return Plugin::success(); } Error GenericDeviceTy::deinit() { // Delete the memory manager before deinitilizing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) delete MemoryManager; MemoryManager = nullptr; return deinitImpl(); } Expected<__tgt_target_table *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, const __tgt_device_image *TgtImage) { DP("Load data from image " DPxMOD "\n", DPxPTR(TgtImage->ImageStart)); // Load the binary and allocate the image object. Use the next available id // for the image id, which is the number of previously loaded images. auto ImageOrErr = loadBinaryImpl(TgtImage, LoadedImages.size()); if (!ImageOrErr) return ImageOrErr.takeError(); DeviceImageTy *Image = *ImageOrErr; assert(Image != nullptr && "Invalid image"); // Add the image to list. LoadedImages.push_back(Image); // Setup the device environment if needed. if (auto Err = setupDeviceEnvironment(Plugin, *Image)) return std::move(Err); // Register all offload entries of the image. if (auto Err = registerOffloadEntries(*Image)) return std::move(Err); // Return the pointer to the table of entries. return Image->getOffloadEntryTable(); } Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin, DeviceImageTy &Image) { // There are some plugins that do not need this step. if (!shouldSetupDeviceEnvironment()) return Plugin::success(); DeviceEnvironmentTy DeviceEnvironment; DeviceEnvironment.DebugKind = OMPX_DebugKind; DeviceEnvironment.NumDevices = Plugin.getNumDevices(); // TODO: The device ID used here is not the real device ID used by OpenMP. DeviceEnvironment.DeviceNum = DeviceId; DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize; // Create the metainfo of the device environment global. GlobalTy DevEnvGlobal("omptarget_device_environment", sizeof(DeviceEnvironmentTy), &DeviceEnvironment); // Write device environment values to the device. GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) { DP("Missing symbol %s, continue execution anyway.\n", DevEnvGlobal.getName().data()); consumeError(std::move(Err)); } return Plugin::success(); } Error GenericDeviceTy::registerOffloadEntries(DeviceImageTy &Image) { const __tgt_offload_entry *Begin = Image.getTgtImage()->EntriesBegin; const __tgt_offload_entry *End = Image.getTgtImage()->EntriesEnd; for (const __tgt_offload_entry *Entry = Begin; Entry != End; ++Entry) { // The host should have always something in the address to uniquely // identify the entry. if (!Entry->addr) return Plugin::error("Failure to register entry without address"); __tgt_offload_entry DeviceEntry = {0}; if (Entry->size) { if (auto Err = registerGlobalOffloadEntry(Image, *Entry, DeviceEntry)) return Err; } else { if (auto Err = registerKernelOffloadEntry(Image, *Entry, DeviceEntry)) return Err; } assert(DeviceEntry.addr && "Device addr of offload entry cannot be null"); DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n", DPxPTR(Entry - Begin), (Entry->size) ? " global" : "", Entry->name, DPxPTR(DeviceEntry.addr)); } return Plugin::success(); } Error GenericDeviceTy::registerGlobalOffloadEntry( DeviceImageTy &Image, const __tgt_offload_entry &GlobalEntry, __tgt_offload_entry &DeviceEntry) { GenericPluginTy &Plugin = Plugin::get(); DeviceEntry = GlobalEntry; // Create a metadata object for the device global. GlobalTy DeviceGlobal(GlobalEntry.name, GlobalEntry.size); // Get the address of the device of the global. GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); if (auto Err = GHandler.getGlobalMetadataFromDevice(*this, Image, DeviceGlobal)) return Err; // Store the device address on the device entry. DeviceEntry.addr = DeviceGlobal.getPtr(); assert(DeviceEntry.addr && "Invalid device global's address"); // Note: In the current implementation declare target variables // can either be link or to. This means that once unified // memory is activated via the requires directive, the variable // can be used directly from the host in both cases. if (Plugin.getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If unified memory is present any target link or to variables // can access host addresses directly. There is no longer a // need for device copies. GlobalTy HostGlobal(GlobalEntry); if (auto Err = GHandler.writeGlobalToDevice(*this, Image, HostGlobal, DeviceGlobal)) return Err; } // Add the device entry on the entry table. Image.getOffloadEntryTable().addEntry(DeviceEntry); return Plugin::success(); } Error GenericDeviceTy::registerKernelOffloadEntry( DeviceImageTy &Image, const __tgt_offload_entry &KernelEntry, __tgt_offload_entry &DeviceEntry) { DeviceEntry = KernelEntry; // Create a kernel object. auto KernelOrErr = constructKernelEntry(KernelEntry, Image); if (!KernelOrErr) return KernelOrErr.takeError(); GenericKernelTy *Kernel = *KernelOrErr; assert(Kernel != nullptr && "Invalid kernel"); // Initialize the kernel. if (auto Err = Kernel->init(*this, Image)) return Err; // Set the device entry address to the kernel address and store the entry on // the entry table. DeviceEntry.addr = (void *)Kernel; Image.getOffloadEntryTable().addEntry(DeviceEntry); return Plugin::success(); } Expected GenericDeviceTy::getExecutionModeForKernel(StringRef Name, DeviceImageTy &Image) { // Create a metadata object for the exec mode global (auto-generated). StaticGlobalTy ExecModeGlobal(Name.data(), "_exec_mode"); // Retrieve execution mode for the kernel. This may fail since some kernels // may not have an execution mode. GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { // Consume the error since it is acceptable to fail. [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); DP("Failed to read execution mode for '%s': %s\n" "Using default SPMD (2) execution mode\n", Name.data(), ErrStr.data()); return OMP_TGT_EXEC_MODE_SPMD; } // Check that the retrieved execution mode is valid. if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) return Plugin::error("Invalid execution mode %d for '%s'", ExecModeGlobal.getValue(), Name.data()); return ExecModeGlobal.getValue(); } Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size) { std::lock_guard Lock(HostAllocationsMutex); auto Res = HostAllocations.insert({Buffer, Size}); if (!Res.second) return Plugin::error("Registering an already registered pinned buffer"); return Plugin::success(); } Error GenericDeviceTy::unregisterHostPinnedMemoryBuffer(const void *Buffer) { std::lock_guard Lock(HostAllocationsMutex); size_t Erased = HostAllocations.erase(Buffer); if (!Erased) return Plugin::error("Cannot find a registered host pinned buffer"); return Plugin::success(); } Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) { if (!AsyncInfo || !AsyncInfo->Queue) return Plugin::error("Invalid async info queue"); return synchronizeImpl(*AsyncInfo); } Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) { if (!AsyncInfo || !AsyncInfo->Queue) return Plugin::error("Invalid async info queue"); return queryAsyncImpl(*AsyncInfo); } Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind) { void *Alloc = nullptr; switch (Kind) { case TARGET_ALLOC_DEFAULT: case TARGET_ALLOC_DEVICE: if (MemoryManager) { Alloc = MemoryManager->allocate(Size, HostPtr); if (!Alloc) return Plugin::error("Failed to allocate from memory manager"); break; } [[fallthrough]]; case TARGET_ALLOC_HOST: case TARGET_ALLOC_SHARED: Alloc = allocate(Size, HostPtr, Kind); if (!Alloc) return Plugin::error("Failed to allocate from device allocator"); } // Report error if the memory manager or the device allocator did not return // any memory buffer. if (!Alloc) return Plugin::error("Invalid target data allocation kind or requested " "allocator not implemented yet"); // Register allocated buffer as pinned memory if the type is host memory. if (Kind == TARGET_ALLOC_HOST) if (auto Err = registerHostPinnedMemoryBuffer(Alloc, Size)) return Err; return Alloc; } Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) { int Res; if (MemoryManager) Res = MemoryManager->free(TgtPtr); else Res = free(TgtPtr, Kind); if (Res) return Plugin::error("Failure to deallocate device pointer %p", TgtPtr); // Unregister deallocated pinned memory buffer if the type is host memory. if (Kind == TARGET_ALLOC_HOST) if (auto Err = unregisterHostPinnedMemoryBuffer(TgtPtr)) return Err; return Plugin::success(); } Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr, int64_t Size, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::runTargetTeamRegion( void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets, int32_t NumArgs, uint64_t NumTeamsClause, uint32_t ThreadLimitClause, uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); GenericKernelTy &GenericKernel = *reinterpret_cast(EntryPtr); Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, NumArgs, NumTeamsClause, ThreadLimitClause, LoopTripCount, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) { assert(AsyncInfoPtr && "Invalid async info"); *AsyncInfoPtr = new __tgt_async_info(); auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, *AsyncInfoPtr); Err = initAsyncInfoImpl(AsyncInfoWrapper); return Err; } Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) { assert(DeviceInfo && "Invalid device info"); return initDeviceInfoImpl(DeviceInfo); } Error GenericDeviceTy::printInfo() { // TODO: Print generic information here return printInfoImpl(); } Error GenericDeviceTy::createEvent(void **EventPtrStorage) { return createEventImpl(EventPtrStorage); } Error GenericDeviceTy::destroyEvent(void *EventPtr) { return destroyEventImpl(EventPtr); } Error GenericDeviceTy::recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); Err = recordEventImpl(EventPtr, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { auto Err = Plugin::success(); AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo); Err = waitEventImpl(EventPtr, AsyncInfoWrapper); return Err; } Error GenericDeviceTy::syncEvent(void *EventPtr) { return syncEventImpl(EventPtr); } Error GenericPluginTy::init() { auto NumDevicesOrErr = initImpl(); if (!NumDevicesOrErr) return NumDevicesOrErr.takeError(); NumDevices = *NumDevicesOrErr; if (NumDevices == 0) return Plugin::success(); assert(Devices.size() == 0 && "Plugin already initialized"); Devices.resize(NumDevices, nullptr); GlobalHandler = Plugin::createGlobalHandler(); assert(GlobalHandler && "Invalid global handler"); return Plugin::success(); } Error GenericPluginTy::deinit() { // There is no global handler if no device is available. if (GlobalHandler) delete GlobalHandler; // Deinitialize all active devices. for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) { if (Devices[DeviceId]) { if (auto Err = deinitDevice(DeviceId)) return Err; } assert(!Devices[DeviceId] && "Device was not deinitialized"); } // Perform last deinitializations on the plugin. return deinitImpl(); } Error GenericPluginTy::initDevice(int32_t DeviceId) { assert(!Devices[DeviceId] && "Device already initialized"); // Create the device and save the reference. GenericDeviceTy *Device = Plugin::createDevice(DeviceId, NumDevices); assert(Device && "Invalid device"); // Save the device reference into the list. Devices[DeviceId] = Device; // Initialize the device and its resources. return Device->init(*this); } Error GenericPluginTy::deinitDevice(int32_t DeviceId) { // The device may be already deinitialized. if (Devices[DeviceId] == nullptr) return Plugin::success(); // Deinitialize the device and release its resources. if (auto Err = Devices[DeviceId]->deinit()) return Err; // Delete the device and invalidate its reference. delete Devices[DeviceId]; Devices[DeviceId] = nullptr; return Plugin::success(); } /// Exposed library API function, basically wrappers around the GenericDeviceTy /// functionality with the same name. All non-async functions are redirected /// to the async versions right away with a NULL AsyncInfoPtr. #ifdef __cplusplus extern "C" { #endif int32_t __tgt_rtl_init_plugin() { auto Err = Plugin::initIfNeeded(); if (Err) REPORT("Failure to initialize plugin " GETNAME(TARGET_NAME) ": %s\n", toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_deinit_plugin() { auto Err = Plugin::deinitIfNeeded(); if (Err) REPORT("Failure to deinitialize plugin " GETNAME(TARGET_NAME) ": %s\n", toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) { if (!Plugin::isActive()) return false; if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits())) return true; return jit::checkBitcodeImage(TgtImage, Plugin::get().getTripleArch()); } int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage, __tgt_image_info *Info) { if (!Plugin::isActive()) return false; if (!__tgt_rtl_is_valid_binary(TgtImage)) return false; // A subarchitecture was not specified. Assume it is compatible. if (!Info->Arch) return true; // Check the compatibility with all the available devices. Notice the // devices may not be initialized yet. auto CompatibleOrErr = Plugin::get().isImageCompatible(Info); if (!CompatibleOrErr) { // This error should not abort the execution, so we just inform the user // through the debug system. std::string ErrString = toString(CompatibleOrErr.takeError()); DP("Failure to check whether image %p is valid: %s\n", TgtImage, ErrString.data()); return false; } bool Compatible = *CompatibleOrErr; DP("Image is %scompatible with current environment: %s\n", (Compatible) ? "" : "not", Info->Arch); return Compatible; } int32_t __tgt_rtl_supports_empty_images() { return Plugin::get().supportsEmptyImages(); } int32_t __tgt_rtl_init_device(int32_t DeviceId) { auto Err = Plugin::get().initDevice(DeviceId); if (Err) REPORT("Failure to initialize device %d: %s\n", DeviceId, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_deinit_device(int32_t DeviceId) { auto Err = Plugin::get().deinitDevice(DeviceId); if (Err) REPORT("Failure to deinitialize device %d: %s\n", DeviceId, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_number_of_devices() { return Plugin::get().getNumDevices(); } int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { Plugin::get().setRequiresFlag(RequiresFlags); return RequiresFlags; } int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId) { return Plugin::get().isDataExchangable(SrcDeviceId, DstDeviceId); } __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage) { GenericPluginTy &Plugin = Plugin::get(); GenericDeviceTy &Device = Plugin.getDevice(DeviceId); // If it is a bitcode image, we have to jit the binary image before loading to // the device. { // TODO: Move this (at least the environment variable) into the JIT.h. UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3); Triple::ArchType TA = Plugin.getTripleArch(); std::string Arch = Device.getArch(); jit::PostProcessingFn PostProcessing = [&Device](std::unique_ptr MB) -> Expected> { return Device.doJITPostProcessing(std::move(MB)); }; if (jit::checkBitcodeImage(TgtImage, TA)) { auto TgtImageOrErr = jit::compile(TgtImage, TA, Arch, JITOptLevel, PostProcessing); if (!TgtImageOrErr) { auto Err = TgtImageOrErr.takeError(); REPORT("Failure to jit binary image from bitcode image %p on device " "%d: %s\n", TgtImage, DeviceId, toString(std::move(Err)).data()); return nullptr; } TgtImage = *TgtImageOrErr; } } auto TableOrErr = Device.loadBinary(Plugin, TgtImage); if (!TableOrErr) { auto Err = TableOrErr.takeError(); REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, DeviceId, toString(std::move(Err)).data()); return nullptr; } __tgt_target_table *Table = *TableOrErr; assert(Table != nullptr && "Invalid table"); return Table; } void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind) { auto AllocOrErr = Plugin::get().getDevice(DeviceId).dataAlloc( Size, HostPtr, (TargetAllocTy)Kind); if (!AllocOrErr) { auto Err = AllocOrErr.takeError(); REPORT("Failure to allocate device memory: %s\n", toString(std::move(Err)).data()); return nullptr; } assert(*AllocOrErr && "Null pointer upon successful allocation"); return *AllocOrErr; } int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) { auto Err = Plugin::get().getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind); if (Err) REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size) { return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr); if (Err) REPORT("Failure to copy data from host to device. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size) { return __tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr); if (Err) REPORT("Faliure to copy data from device to host. Pointers: host " "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId, void *DstPtr, int64_t Size) { return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, Size, /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, int DstDeviceId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { GenericDeviceTy &SrcDevice = Plugin::get().getDevice(SrcDeviceId); GenericDeviceTy &DstDevice = Plugin::get().getDevice(DstDeviceId); auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo); if (Err) REPORT("Failure to copy data from device (%d) to device (%d). Pointers: " "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripCount) { return __tgt_rtl_run_target_team_region_async(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit, LoopTripCount, /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_run_target_team_region_async( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).runTargetTeamRegion( TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit, LoopTripCount, AsyncInfoPtr); if (Err) REPORT("Failure to run target region " DPxMOD " in device %d: %s\n", DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).synchronize(AsyncInfoPtr); if (Err) REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).queryAsync(AsyncInfoPtr); if (Err) REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs) { return __tgt_rtl_run_target_region_async(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, /* AsyncInfoPtr */ nullptr); } int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs, __tgt_async_info *AsyncInfoPtr) { return __tgt_rtl_run_target_team_region_async( DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, /* team num*/ 1, /* thread limit */ 1, /* loop tripcount */ 0, AsyncInfoPtr); } void __tgt_rtl_print_device_info(int32_t DeviceId) { if (auto Err = Plugin::get().getDevice(DeviceId).printInfo()) REPORT("Failure to print device %d info: %s\n", DeviceId, toString(std::move(Err)).data()); } int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) { auto Err = Plugin::get().getDevice(DeviceId).createEvent(EventPtr); if (Err) REPORT("Failure to create event: %s\n", toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); if (Err) REPORT("Failure to record event %p: %s\n", EventPtr, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { auto Err = Plugin::get().getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); if (Err) REPORT("Failure to wait event %p: %s\n", EventPtr, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) { auto Err = Plugin::get().getDevice(DeviceId).syncEvent(EventPtr); if (Err) REPORT("Failure to synchronize event %p: %s\n", EventPtr, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) { auto Err = Plugin::get().getDevice(DeviceId).destroyEvent(EventPtr); if (Err) REPORT("Failure to destroy event %p: %s\n", EventPtr, toString(std::move(Err)).data()); return (bool)Err; } void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) { std::atomic &InfoLevel = getInfoLevelInternal(); InfoLevel.store(NewInfoLevel); } int32_t __tgt_rtl_init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr) { assert(AsyncInfoPtr && "Invalid async info"); auto Err = Plugin::get().getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); if (Err) REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n", DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data()); return (bool)Err; } int32_t __tgt_rtl_init_device_info(int32_t DeviceId, __tgt_device_info *DeviceInfo, const char **ErrStr) { *ErrStr = ""; auto Err = Plugin::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo); if (Err) REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n", DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); return (bool)Err; } #ifdef __cplusplus } #endif