[flang][cuda] Use a reference for asyncObject (#140614)

Switch from `int64_t` to `int64_t*` to fit with the rest of the
implementation.

New tentative with some fix. The previous was reverted some time ago.

Reviewed in #138010
This commit is contained in:
Valentin Clement (バレンタイン クレメン)
2025-05-19 15:02:53 -07:00
committed by GitHub
parent a04cff172f
commit f5609aa1b0
55 changed files with 183 additions and 184 deletions

View File

@@ -19,7 +19,7 @@
namespace Fortran::runtime {
using AllocFct = void *(*)(std::size_t, std::int64_t);
using AllocFct = void *(*)(std::size_t, std::int64_t *);
using FreeFct = void (*)(void *);
typedef struct Allocator_t {
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
} Allocator_t;
static RT_API_ATTRS void *MallocWrapper(
std::size_t size, [[maybe_unused]] std::int64_t) {
std::size_t size, [[maybe_unused]] std::int64_t *) {
return std::malloc(size);
}
#ifdef RT_DEVICE_COMPILATION

View File

@@ -29,8 +29,8 @@
#include <cstdio>
#include <cstring>
/// Value used for asyncId when no specific stream is specified.
static constexpr std::int64_t kNoAsyncId = -1;
/// Value used for asyncObject when no specific stream is specified.
static constexpr std::int64_t *kNoAsyncObject = nullptr;
namespace Fortran::runtime {
@@ -372,7 +372,7 @@ public:
// before calling. It (re)computes the byte strides after
// allocation. Does not allocate automatic components or
// perform default component initialization.
RT_API_ATTRS int Allocate(std::int64_t asyncId);
RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
RT_API_ATTRS void SetByteStrides();
// Deallocates storage; does not call FINAL subroutines or

View File

@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
// as the element size of the source.
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
CFI_attribute_allocatable);
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}

View File

@@ -14,6 +14,7 @@ add_flangrt_library(flang_rt.cuda STATIC SHARED
kernel.cpp
memmove-function.cpp
memory.cpp
pointer.cpp
registration.cpp
TARGET_PROPERTIES

View File

@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
return stat;
}
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
if (desc.HasAddendum()) {
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocate)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
}
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFAllocatableAllocateSync)(
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};

View File

@@ -98,7 +98,7 @@ static unsigned findAllocation(void *ptr) {
return allocNotFound;
}
static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
CriticalSection critical{lock};
initAllocations();
if (numDeviceAllocations >= maxDeviceAllocations) {
@@ -106,7 +106,7 @@ static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
}
deviceAllocations[numDeviceAllocations].ptr = ptr;
deviceAllocations[numDeviceAllocations].size = size;
deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
deviceAllocations[numDeviceAllocations].stream = stream;
++numDeviceAllocations;
qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
compareDeviceAlloc);
@@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() {
}
void *CUFAllocPinned(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
return p;
@@ -144,18 +144,18 @@ void *CUFAllocPinned(
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
void *p;
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
if (asyncId == kNoAsyncId) {
if (asyncObject == kNoAsyncObject) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
} else {
CUDA_REPORT_IF_ERROR(
cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
insertAllocation(p, sizeInBytes, asyncId);
cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
}
}
return p;
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
}
void *CUFAllocManaged(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
void *p;
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
void *CUFAllocUnified(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
// Call alloc managed for the time being.
return CUFAllocManaged(sizeInBytes, asyncId);
return CUFAllocManaged(sizeInBytes, asyncObject);
}
void CUFFreeUnified(void *p) {

View File

@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
Descriptor *RTDEF(CUFAllocDescriptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(
CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
}
void RTDEF(CUFFreeDescriptor)(

View File

@@ -22,7 +22,7 @@ namespace Fortran::runtime::cuda {
extern "C" {
RT_EXT_API_GROUP_BEGIN
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
if (desc.HasAddendum()) {
@@ -43,7 +43,7 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
return stat;
}
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream,
bool *pinned, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
@@ -62,7 +62,7 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
}
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocate)(
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
@@ -75,7 +75,7 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
}
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int stat{RTNAME(CUFPointerAllocateSync)(
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};

View File

@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
}
}
int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
if (!descriptor.IsAllocatable()) {
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
} else if (descriptor.IsAllocated()) {
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
} else {
int stat{
ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
int stat{ReturnError(
terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(AllocatableAllocate)(
alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
DoFromSourceAssign(alloc, source, terminator);

View File

@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
initialAllocationSize(fromElements, to.ElementBytes())};
to.GetDimension(0).SetBounds(1, allocationSize);
RTNAME(AllocatableAllocate)
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
(to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
to.GetDimension(0).SetBounds(1, fromElements);
vector.actualAllocationSize = allocationSize;
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
// first value: there should be no reallocation.
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
RTNAME(AllocatableAllocate)
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
(to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
vector.actualAllocationSize = previousToElements;
}

View File

@@ -102,7 +102,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
toDim.SetByteStride(stride);
stride *= toDim.Extent();
}
int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
result = ReturnError(terminator, Initialize(to, *derived, terminator));
}
@@ -280,7 +280,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
// entity, otherwise, the Deallocate() below will not
// free the descriptor memory.
newFrom.raw().attribute = CFI_attribute_allocatable;
auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
if (stat == StatOk) {
if (HasDynamicComponent(from)) {
// If 'from' has allocatable/automatic component, we cannot

View File

@@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("Compare: could not allocate storage for result");
}
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
@@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
}
for (SubscriptValue resultAt{0}; elements-- > 0;
@@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("LEN_TRIM: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, ub[j]);
}
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
}
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
@@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
RUNTIME_CHECK(
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
}
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
@@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
for (int j{0}; j < rank; ++j) {
accumulator.GetDimension(j).SetBounds(1, ub[j]);
}
if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash(
"CharacterConcatenate: could not allocate storage for result");
}
@@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)(
accumulator.set_base_addr(nullptr);
std::size_t oldLen{accumulator.ElementBytes()};
accumulator.raw().elem_len += chars;
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
RUNTIME_CHECK(
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
FreeMemory(old);
}
@@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
std::size_t origBytes{string.ElementBytes()};
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
CFI_attribute_allocatable);
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
terminator.Crash("REPEAT could not allocate storage for result");
}
const char *from{string.OffsetElement()};
@@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
}
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
CFI_attribute_allocatable);
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
}

View File

@@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
if (toDesc.raw().base_addr != nullptr) {
toDesc.set_base_addr(nullptr);
RUNTIME_CHECK(
terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
RUNTIME_CHECK(terminator,
toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS);
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
fromPtr + component->offset())};
copyStack.emplace(toDesc, fromDesc);

View File

@@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
allocDesc.raw().attribute = CFI_attribute_allocatable;
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
stat = ReturnError(
terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
@@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
if (origDesc.IsAllocated()) {
cloneDesc.ApplyMold(origDesc, origDesc.rank());
stat = ReturnError(
terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
if (const typeInfo::DerivedType *
@@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
copy.raw().attribute = CFI_attribute_allocatable;
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
argDescriptor = &copy;
}

View File

@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
#endif
}
RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
void *p{alloc(byteSize ? byteSize : 1, asyncId)};
void *p{alloc(byteSize ? byteSize : 1, asyncObject)};
if (!p) {
return CFI_ERROR_MEM_ALLOCATION;
}

View File

@@ -152,7 +152,7 @@ inline RT_API_ATTRS void CharacterMaxOrMinLoc(const char *intrinsic,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}
@@ -181,7 +181,7 @@ inline RT_API_ATTRS void TotalNumericMaxOrMinLoc(const char *intrinsic,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}

View File

@@ -220,7 +220,7 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x,
CFI_attribute_allocatable);
result.GetDimension(0).SetBounds(1, extent[0]);
Terminator terminator{source, line};
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"FINDLOC: could not allocate memory for result; STAT=%d", stat);
}

View File

@@ -183,7 +183,7 @@ inline static RT_API_ATTRS void DoMatmulTranspose(
for (int j{0}; j < resRank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"MATMUL-TRANSPOSE: could not allocate memory for result; STAT=%d",
stat);

View File

@@ -255,7 +255,7 @@ static inline RT_API_ATTRS void DoMatmul(
for (int j{0}; j < resRank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"MATMUL: could not allocate memory for result; STAT=%d", stat);
}

View File

@@ -30,7 +30,7 @@ static RT_API_ATTRS void TransferImpl(Descriptor &result,
if (const DescriptorAddendum * addendum{mold.Addendum()}) {
*result.Addendum() = *addendum;
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
Terminator{sourceFile, line}.Crash(
"TRANSFER: could not allocate memory for result; STAT=%d", stat);
}

View File

@@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload(
byteSize = ((byteSize + align - 1) / align) * align;
std::size_t total{byteSize + sizeof(std::uintptr_t)};
AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)};
void *p{alloc(total, /*asyncId=*/-1)};
void *p{alloc(total, /*asyncObject=*/nullptr)};
if (p && allocatorIdx == 0) {
// Fill the footer word with the XOR of the ones' complement of
// the base address, which is a value that would be highly unlikely

View File

@@ -148,7 +148,7 @@ void DescriptorStorage<COPY_VALUES>::push(const Descriptor &source) {
if constexpr (COPY_VALUES) {
// copy the data pointed to by the box
box.set_base_addr(nullptr);
box.Allocate(kNoAsyncId);
box.Allocate(kNoAsyncObject);
RTNAME(AssignTemporary)
(box, source, terminator_.sourceFileName(), terminator_.sourceLine());
}

View File

@@ -261,7 +261,7 @@ RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
for (int j{0}; j + 1 < xRank; ++j) {
result.GetDimension(j).SetBounds(1, resultExtent[j]);
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
}

View File

@@ -132,7 +132,7 @@ static inline RT_API_ATTRS std::size_t AllocateResult(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: Could not allocate memory for result (stat=%d)", function, stat);
}
@@ -157,7 +157,7 @@ static inline RT_API_ATTRS std::size_t AllocateBesselResult(Descriptor &result,
for (int j{0}; j < rank; ++j) {
result.GetDimension(j).SetBounds(1, extent[j]);
}
if (int stat{result.Allocate(kNoAsyncId)}) {
if (int stat{result.Allocate(kNoAsyncObject)}) {
terminator.Crash(
"%s: Could not allocate memory for result (stat=%d)", function, stat);
}

View File

@@ -26,7 +26,7 @@ int main() {
for (int j{0}; j < 3; ++j) {
source->GetDimension(j).SetBounds(1, sourceExtent[j]);
}
TEST(source->Allocate(kNoAsyncId) == CFI_SUCCESS);
TEST(source->Allocate(kNoAsyncObject) == CFI_SUCCESS);
TEST(source->IsAllocated());
MATCH(2, source->GetDimension(0).Extent());
MATCH(3, source->GetDimension(1).Extent());

View File

@@ -26,7 +26,7 @@ TEST(AllocatableTest, MoveAlloc) {
auto b{createAllocatable(TypeCategory::Integer, 4)};
// ALLOCATE(a(20))
a->GetDimension(0).SetBounds(1, 20);
a->Allocate(kNoAsyncId);
a->Allocate(kNoAsyncObject);
EXPECT_TRUE(a->IsAllocated());
EXPECT_FALSE(b->IsAllocated());
@@ -46,7 +46,7 @@ TEST(AllocatableTest, MoveAlloc) {
// move_alloc with errMsg
auto errMsg{Descriptor::Create(
sizeof(char), 64, nullptr, 0, nullptr, CFI_attribute_allocatable)};
errMsg->Allocate(kNoAsyncId);
errMsg->Allocate(kNoAsyncObject);
RTNAME(MoveAlloc)(*b, *a, nullptr, false, errMsg.get(), __FILE__, __LINE__);
EXPECT_FALSE(a->IsAllocated());
EXPECT_TRUE(b->IsAllocated());

View File

@@ -42,7 +42,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) {
CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));
RTNAME(AllocatableAllocate)
(*a, kNoAsyncId, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*a, kNoAsyncObject, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
cudaDeviceSynchronize();
@@ -82,19 +83,22 @@ TEST(AllocatableCUFTest, StreamDeviceAllocatable) {
RTNAME(AllocatableSetBounds)(*c, 0, 1, 100);
RTNAME(AllocatableAllocate)
(*a, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());
RTNAME(AllocatableAllocate)
(*b, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*b, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(b->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());
RTNAME(AllocatableAllocate)
(*c, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
(*c, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(c->IsAllocated());
cudaDeviceSynchronize();
EXPECT_EQ(cudaSuccess, cudaGetLastError());

View File

@@ -35,7 +35,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)
@@ -54,7 +54,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
EXPECT_FALSE(a->HasAddendum());
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
EXPECT_TRUE(a->IsAllocated());
RTNAME(AllocatableDeallocate)

View File

@@ -50,8 +50,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
RTNAME(AllocatableAllocate)
(*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
__LINE__);
(*dev, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
__FILE__, __LINE__);
EXPECT_TRUE(dev->IsAllocated());
// Create temp array to transfer to device.

View File

@@ -35,7 +35,7 @@ OwningPtr<Descriptor> CreateDescriptor(const std::vector<SubscriptValue> &shape,
for (int j{0}; j < rank; ++j) {
descriptor->GetDimension(j).SetBounds(2, shape[j] + 1);
}
if (descriptor->Allocate(kNoAsyncId) != 0) {
if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}

View File

@@ -26,7 +26,7 @@ template <std::size_t n = 64>
static OwningPtr<Descriptor> CreateEmptyCharDescriptor() {
OwningPtr<Descriptor> descriptor{Descriptor::Create(
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
if (descriptor->Allocate(kNoAsyncId) != 0) {
if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
return descriptor;
@@ -36,7 +36,7 @@ static OwningPtr<Descriptor> CharDescriptor(const char *value) {
std::size_t n{std::strlen(value)};
OwningPtr<Descriptor> descriptor{Descriptor::Create(
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
if (descriptor->Allocate(kNoAsyncId) != 0) {
if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
std::memcpy(descriptor->OffsetElement(), value, n);
@@ -47,7 +47,7 @@ template <int kind = sizeof(std::int64_t)>
static OwningPtr<Descriptor> EmptyIntDescriptor() {
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
if (descriptor->Allocate(kNoAsyncId) != 0) {
if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
return descriptor;
@@ -57,7 +57,7 @@ template <int kind = sizeof(std::int64_t)>
static OwningPtr<Descriptor> IntDescriptor(const int &value) {
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
if (descriptor->Allocate(kNoAsyncId) != 0) {
if (descriptor->Allocate(kNoAsyncObject) != 0) {
return nullptr;
}
std::memcpy(descriptor->OffsetElement<int>(), &value, sizeof(int));

View File

@@ -59,7 +59,7 @@ TEST(TemporaryStack, ValueStackBasic) {
Descriptor &outputDesc2{testDescriptorStorage[2].descriptor()};
inputDesc.Establish(code, elementBytes, descriptorPtr, rank, extent);
inputDesc.Allocate(kNoAsyncId);
inputDesc.Allocate(kNoAsyncObject);
ASSERT_EQ(inputDesc.IsAllocated(), true);
uint32_t *inputData = static_cast<uint32_t *>(inputDesc.raw().base_addr);
for (std::size_t i = 0; i < inputDesc.Elements(); ++i) {
@@ -123,7 +123,7 @@ TEST(TemporaryStack, ValueStackMultiSize) {
boxDims.extent = extent[dim];
boxDims.sm = elementBytes;
}
desc->Allocate(kNoAsyncId);
desc->Allocate(kNoAsyncObject);
// fill the array with some data to test
for (uint32_t i = 0; i < desc->Elements(); ++i) {

View File

@@ -42,7 +42,7 @@ static OwningPtr<Descriptor> MakeArray(const std::vector<int> &shape,
for (int j{0}; j < rank; ++j) {
result->GetDimension(j).SetBounds(1, shape[j]);
}
int stat{result->Allocate(kNoAsyncId)};
int stat{result->Allocate(kNoAsyncObject)};
EXPECT_EQ(stat, 0) << stat;
EXPECT_LE(data.size(), result->Elements());
char *p{result->OffsetElement<char>()};