Files
clang-p2996/flang-rt/lib/runtime/work-queue.cpp
Peter Klausler 163c67ad3d [flang][runtime] Replace recursion with iterative work queue (#137727)
Recursion, both direct and indirect, prevents accurate stack size
calculation at link time for GPU device code. Restructure these
recursive (often mutually so) routines in the Fortran runtime with new
implementations based on an iterative work queue with
suspendable/resumable work tickets: Assign, Initialize, initializeClone,
Finalize, and Destroy.

Default derived type I/O is also recursive, but already disabled. It can
be added to this new framework later if the overall approach succeeds.

Note that derived type FINAL subroutine calls, defined assignments, and
defined I/O procedures all perform callbacks into user code, which may
well reenter the runtime library. This kind of recursion is not handled
by this change, although it may be possible to do so in the future using
thread-local work queues.

The effects of this restructuring on CPU performance are yet to be
measured.
2025-06-10 14:44:19 -07:00

162 lines
4.2 KiB
C++

//===-- lib/runtime/work-queue.cpp ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang-rt/runtime/work-queue.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/memory.h"
#include "flang-rt/runtime/type-info.h"
#include "flang/Common/visit.h"
namespace Fortran::runtime {
#if !defined(RT_DEVICE_COMPILATION)
// FLANG_RT_DEBUG code is disabled when false.
static constexpr bool enableDebugOutput{false};
#endif
RT_OFFLOAD_API_GROUP_BEGIN
RT_API_ATTRS Componentwise::Componentwise(const typeInfo::DerivedType &derived)
: derived_{derived}, components_{derived_.component().Elements()} {
GetComponent();
}
RT_API_ATTRS void Componentwise::GetComponent() {
if (IsComplete()) {
component_ = nullptr;
} else {
const Descriptor &componentDesc{derived_.component()};
component_ = componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
componentAt_);
}
}
RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
if (!begun) {
begun = true;
return common::visit(
[&workQueue](
auto &specificTicket) { return specificTicket.Begin(workQueue); },
u);
} else {
return common::visit(
[&workQueue](auto &specificTicket) {
return specificTicket.Continue(workQueue);
},
u);
}
}
RT_API_ATTRS WorkQueue::~WorkQueue() {
if (last_) {
if ((last_->next = firstFree_)) {
last_->next->previous = last_;
}
firstFree_ = first_;
first_ = last_ = nullptr;
}
while (firstFree_) {
TicketList *next{firstFree_->next};
if (!firstFree_->isStatic) {
FreeMemory(firstFree_);
}
firstFree_ = next;
}
}
RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
if (!firstFree_) {
void *p{AllocateMemoryOrCrash(terminator_, sizeof(TicketList))};
firstFree_ = new (p) TicketList;
firstFree_->isStatic = false;
}
TicketList *newTicket{firstFree_};
if ((firstFree_ = newTicket->next)) {
firstFree_->previous = nullptr;
}
TicketList *after{insertAfter_ ? insertAfter_->next : nullptr};
if ((newTicket->previous = insertAfter_ ? insertAfter_ : last_)) {
newTicket->previous->next = newTicket;
} else {
first_ = newTicket;
}
if ((newTicket->next = after)) {
after->previous = newTicket;
} else {
last_ = newTicket;
}
newTicket->ticket.begun = false;
#if !defined(RT_DEVICE_COMPILATION)
if (enableDebugOutput &&
(executionEnvironment.internalDebugging &
ExecutionEnvironment::WorkQueue)) {
std::fprintf(stderr, "WQ: new ticket\n");
}
#endif
return newTicket->ticket;
}
RT_API_ATTRS int WorkQueue::Run() {
while (last_) {
TicketList *at{last_};
insertAfter_ = last_;
#if !defined(RT_DEVICE_COMPILATION)
if (enableDebugOutput &&
(executionEnvironment.internalDebugging &
ExecutionEnvironment::WorkQueue)) {
std::fprintf(stderr, "WQ: %zd %s\n", at->ticket.u.index(),
at->ticket.begun ? "Continue" : "Begin");
}
#endif
int stat{at->ticket.Continue(*this)};
#if !defined(RT_DEVICE_COMPILATION)
if (enableDebugOutput &&
(executionEnvironment.internalDebugging &
ExecutionEnvironment::WorkQueue)) {
std::fprintf(stderr, "WQ: ... stat %d\n", stat);
}
#endif
insertAfter_ = nullptr;
if (stat == StatOk) {
if (at->previous) {
at->previous->next = at->next;
} else {
first_ = at->next;
}
if (at->next) {
at->next->previous = at->previous;
} else {
last_ = at->previous;
}
if ((at->next = firstFree_)) {
at->next->previous = at;
}
at->previous = nullptr;
firstFree_ = at;
} else if (stat != StatContinue) {
Stop();
return stat;
}
}
return StatOk;
}
RT_API_ATTRS void WorkQueue::Stop() {
if (last_) {
if ((last_->next = firstFree_)) {
last_->next->previous = last_;
}
firstFree_ = first_;
first_ = last_ = nullptr;
}
}
RT_OFFLOAD_API_GROUP_END
} // namespace Fortran::runtime