[flang][cuda] Use a reference for asyncObject (#140614)
Switch from `int64_t` to `int64_t*` to fit with the rest of the implementation. New tentative with some fix. The previous was reverted some time ago. Reviewed in #138010
This commit is contained in:
committed by
GitHub
parent
a04cff172f
commit
f5609aa1b0
@@ -19,7 +19,7 @@
|
||||
|
||||
namespace Fortran::runtime {
|
||||
|
||||
using AllocFct = void *(*)(std::size_t, std::int64_t);
|
||||
using AllocFct = void *(*)(std::size_t, std::int64_t *);
|
||||
using FreeFct = void (*)(void *);
|
||||
|
||||
typedef struct Allocator_t {
|
||||
@@ -28,7 +28,7 @@ typedef struct Allocator_t {
|
||||
} Allocator_t;
|
||||
|
||||
static RT_API_ATTRS void *MallocWrapper(
|
||||
std::size_t size, [[maybe_unused]] std::int64_t) {
|
||||
std::size_t size, [[maybe_unused]] std::int64_t *) {
|
||||
return std::malloc(size);
|
||||
}
|
||||
#ifdef RT_DEVICE_COMPILATION
|
||||
|
||||
@@ -29,8 +29,8 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
/// Value used for asyncId when no specific stream is specified.
|
||||
static constexpr std::int64_t kNoAsyncId = -1;
|
||||
/// Value used for asyncObject when no specific stream is specified.
|
||||
static constexpr std::int64_t *kNoAsyncObject = nullptr;
|
||||
|
||||
namespace Fortran::runtime {
|
||||
|
||||
@@ -372,7 +372,7 @@ public:
|
||||
// before calling. It (re)computes the byte strides after
|
||||
// allocation. Does not allocate automatic components or
|
||||
// perform default component initialization.
|
||||
RT_API_ATTRS int Allocate(std::int64_t asyncId);
|
||||
RT_API_ATTRS int Allocate(std::int64_t *asyncObject);
|
||||
RT_API_ATTRS void SetByteStrides();
|
||||
|
||||
// Deallocates storage; does not call FINAL subroutines or
|
||||
|
||||
@@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
|
||||
// as the element size of the source.
|
||||
result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
|
||||
CFI_attribute_allocatable);
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ add_flangrt_library(flang_rt.cuda STATIC SHARED
|
||||
kernel.cpp
|
||||
memmove-function.cpp
|
||||
memory.cpp
|
||||
pointer.cpp
|
||||
registration.cpp
|
||||
|
||||
TARGET_PROPERTIES
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace Fortran::runtime::cuda {
|
||||
extern "C" {
|
||||
RT_EXT_API_GROUP_BEGIN
|
||||
|
||||
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
|
||||
int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t *stream,
|
||||
bool *pinned, bool hasStat, const Descriptor *errMsg,
|
||||
const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFAllocatableAllocate)(
|
||||
@@ -41,7 +41,7 @@ int RTDEF(CUFAllocatableAllocateSync)(Descriptor &desc, int64_t stream,
|
||||
return stat;
|
||||
}
|
||||
|
||||
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
|
||||
int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t *stream,
|
||||
bool *pinned, bool hasStat, const Descriptor *errMsg,
|
||||
const char *sourceFile, int sourceLine) {
|
||||
if (desc.HasAddendum()) {
|
||||
@@ -63,7 +63,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
|
||||
}
|
||||
|
||||
int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
|
||||
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
||||
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
|
||||
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFAllocatableAllocate)(
|
||||
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
@@ -76,7 +76,7 @@ int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc,
|
||||
}
|
||||
|
||||
int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc,
|
||||
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
||||
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
|
||||
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFAllocatableAllocateSync)(
|
||||
alloc, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
|
||||
@@ -98,7 +98,7 @@ static unsigned findAllocation(void *ptr) {
|
||||
return allocNotFound;
|
||||
}
|
||||
|
||||
static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
|
||||
static void insertAllocation(void *ptr, std::size_t size, cudaStream_t stream) {
|
||||
CriticalSection critical{lock};
|
||||
initAllocations();
|
||||
if (numDeviceAllocations >= maxDeviceAllocations) {
|
||||
@@ -106,7 +106,7 @@ static void insertAllocation(void *ptr, std::size_t size, std::int64_t stream) {
|
||||
}
|
||||
deviceAllocations[numDeviceAllocations].ptr = ptr;
|
||||
deviceAllocations[numDeviceAllocations].size = size;
|
||||
deviceAllocations[numDeviceAllocations].stream = (cudaStream_t)stream;
|
||||
deviceAllocations[numDeviceAllocations].stream = stream;
|
||||
++numDeviceAllocations;
|
||||
qsort(deviceAllocations, numDeviceAllocations, sizeof(DeviceAllocation),
|
||||
compareDeviceAlloc);
|
||||
@@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() {
|
||||
}
|
||||
|
||||
void *CUFAllocPinned(
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
|
||||
void *p;
|
||||
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
|
||||
return p;
|
||||
@@ -144,18 +144,18 @@ void *CUFAllocPinned(
|
||||
|
||||
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
|
||||
|
||||
void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
|
||||
void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) {
|
||||
void *p;
|
||||
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
|
||||
} else {
|
||||
if (asyncId == kNoAsyncId) {
|
||||
if (asyncObject == kNoAsyncObject) {
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
|
||||
} else {
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
|
||||
insertAllocation(p, sizeInBytes, asyncId);
|
||||
cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject));
|
||||
insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject);
|
||||
}
|
||||
}
|
||||
return p;
|
||||
@@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) {
|
||||
}
|
||||
|
||||
void *CUFAllocManaged(
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
|
||||
void *p;
|
||||
CUDA_REPORT_IF_ERROR(
|
||||
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
|
||||
@@ -184,9 +184,9 @@ void *CUFAllocManaged(
|
||||
void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
|
||||
|
||||
void *CUFAllocUnified(
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
|
||||
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) {
|
||||
// Call alloc managed for the time being.
|
||||
return CUFAllocManaged(sizeInBytes, asyncId);
|
||||
return CUFAllocManaged(sizeInBytes, asyncObject);
|
||||
}
|
||||
|
||||
void CUFFreeUnified(void *p) {
|
||||
|
||||
@@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN
|
||||
Descriptor *RTDEF(CUFAllocDescriptor)(
|
||||
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
|
||||
return reinterpret_cast<Descriptor *>(
|
||||
CUFAllocManaged(sizeInBytes, /*asyncId*/ -1));
|
||||
CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr));
|
||||
}
|
||||
|
||||
void RTDEF(CUFFreeDescriptor)(
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace Fortran::runtime::cuda {
|
||||
extern "C" {
|
||||
RT_EXT_API_GROUP_BEGIN
|
||||
|
||||
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
|
||||
int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t *stream, bool *pinned,
|
||||
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
|
||||
int sourceLine) {
|
||||
if (desc.HasAddendum()) {
|
||||
@@ -43,7 +43,7 @@ int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool *pinned,
|
||||
return stat;
|
||||
}
|
||||
|
||||
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
|
||||
int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t *stream,
|
||||
bool *pinned, bool hasStat, const Descriptor *errMsg,
|
||||
const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFPointerAllocate)(
|
||||
@@ -62,7 +62,7 @@ int RTDEF(CUFPointerAllocateSync)(Descriptor &desc, int64_t stream,
|
||||
}
|
||||
|
||||
int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
|
||||
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
||||
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
|
||||
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFPointerAllocate)(
|
||||
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
@@ -75,7 +75,7 @@ int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer,
|
||||
}
|
||||
|
||||
int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer,
|
||||
const Descriptor &source, int64_t stream, bool *pinned, bool hasStat,
|
||||
const Descriptor &source, int64_t *stream, bool *pinned, bool hasStat,
|
||||
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(CUFPointerAllocateSync)(
|
||||
pointer, stream, pinned, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
|
||||
@@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)(
|
||||
}
|
||||
}
|
||||
|
||||
int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
|
||||
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
|
||||
int sourceLine) {
|
||||
int RTDEF(AllocatableAllocate)(Descriptor &descriptor,
|
||||
std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg,
|
||||
const char *sourceFile, int sourceLine) {
|
||||
Terminator terminator{sourceFile, sourceLine};
|
||||
if (!descriptor.IsAllocatable()) {
|
||||
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
|
||||
} else if (descriptor.IsAllocated()) {
|
||||
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
|
||||
} else {
|
||||
int stat{
|
||||
ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
|
||||
int stat{ReturnError(
|
||||
terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)};
|
||||
if (stat == StatOk) {
|
||||
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
|
||||
if (const auto *derived{addendum->derivedType()}) {
|
||||
@@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
|
||||
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
|
||||
const char *sourceFile, int sourceLine) {
|
||||
int stat{RTNAME(AllocatableAllocate)(
|
||||
alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)};
|
||||
if (stat == StatOk) {
|
||||
Terminator terminator{sourceFile, sourceLine};
|
||||
DoFromSourceAssign(alloc, source, terminator);
|
||||
|
||||
@@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
|
||||
initialAllocationSize(fromElements, to.ElementBytes())};
|
||||
to.GetDimension(0).SetBounds(1, allocationSize);
|
||||
RTNAME(AllocatableAllocate)
|
||||
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
|
||||
(to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
|
||||
vector.sourceFile, vector.sourceLine);
|
||||
to.GetDimension(0).SetBounds(1, fromElements);
|
||||
vector.actualAllocationSize = allocationSize;
|
||||
@@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
|
||||
// first value: there should be no reallocation.
|
||||
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
|
||||
RTNAME(AllocatableAllocate)
|
||||
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
|
||||
(to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
|
||||
vector.sourceFile, vector.sourceLine);
|
||||
vector.actualAllocationSize = previousToElements;
|
||||
}
|
||||
|
||||
@@ -102,7 +102,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
|
||||
toDim.SetByteStride(stride);
|
||||
stride *= toDim.Extent();
|
||||
}
|
||||
int result{ReturnError(terminator, to.Allocate(kNoAsyncId))};
|
||||
int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
|
||||
if (result == StatOk && derived && !derived->noInitializationNeeded()) {
|
||||
result = ReturnError(terminator, Initialize(to, *derived, terminator));
|
||||
}
|
||||
@@ -280,7 +280,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
|
||||
// entity, otherwise, the Deallocate() below will not
|
||||
// free the descriptor memory.
|
||||
newFrom.raw().attribute = CFI_attribute_allocatable;
|
||||
auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))};
|
||||
auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
|
||||
if (stat == StatOk) {
|
||||
if (HasDynamicComponent(from)) {
|
||||
// If 'from' has allocatable/automatic component, we cannot
|
||||
|
||||
@@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash("Compare: could not allocate storage for result");
|
||||
}
|
||||
std::size_t xChars{x.ElementBytes() >> shift<CHAR>};
|
||||
@@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
|
||||
}
|
||||
for (SubscriptValue resultAt{0}; elements-- > 0;
|
||||
@@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash("LEN_TRIM: could not allocate storage for result");
|
||||
}
|
||||
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
|
||||
@@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
|
||||
}
|
||||
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
|
||||
@@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
accumulator.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
|
||||
RUNTIME_CHECK(
|
||||
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
||||
}
|
||||
for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
|
||||
accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
|
||||
@@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
accumulator.GetDimension(j).SetBounds(1, ub[j]);
|
||||
}
|
||||
if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash(
|
||||
"CharacterConcatenate: could not allocate storage for result");
|
||||
}
|
||||
@@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)(
|
||||
accumulator.set_base_addr(nullptr);
|
||||
std::size_t oldLen{accumulator.ElementBytes()};
|
||||
accumulator.raw().elem_len += chars;
|
||||
RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS);
|
||||
RUNTIME_CHECK(
|
||||
terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
||||
std::memcpy(accumulator.OffsetElement<char>(oldLen), from, chars);
|
||||
FreeMemory(old);
|
||||
}
|
||||
@@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string,
|
||||
std::size_t origBytes{string.ElementBytes()};
|
||||
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
|
||||
CFI_attribute_allocatable);
|
||||
if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) {
|
||||
if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) {
|
||||
terminator.Crash("REPEAT could not allocate storage for result");
|
||||
}
|
||||
const char *from{string.OffsetElement()};
|
||||
@@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string,
|
||||
}
|
||||
result.Establish(string.type(), resultBytes, nullptr, 0, nullptr,
|
||||
CFI_attribute_allocatable);
|
||||
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS);
|
||||
RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
||||
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
|
||||
}
|
||||
|
||||
|
||||
@@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
|
||||
*reinterpret_cast<Descriptor *>(toPtr + component->offset())};
|
||||
if (toDesc.raw().base_addr != nullptr) {
|
||||
toDesc.set_base_addr(nullptr);
|
||||
RUNTIME_CHECK(
|
||||
terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS);
|
||||
RUNTIME_CHECK(terminator,
|
||||
toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS);
|
||||
const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>(
|
||||
fromPtr + component->offset())};
|
||||
copyStack.emplace(toDesc, fromDesc);
|
||||
|
||||
@@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
|
||||
allocDesc.raw().attribute = CFI_attribute_allocatable;
|
||||
if (comp.genre() == typeInfo::Component::Genre::Automatic) {
|
||||
stat = ReturnError(
|
||||
terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat);
|
||||
terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
|
||||
if (stat == StatOk) {
|
||||
if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
|
||||
if (const auto *derived{addendum->derivedType()}) {
|
||||
@@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone,
|
||||
if (origDesc.IsAllocated()) {
|
||||
cloneDesc.ApplyMold(origDesc, origDesc.rank());
|
||||
stat = ReturnError(
|
||||
terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat);
|
||||
terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
|
||||
if (stat == StatOk) {
|
||||
if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
|
||||
if (const typeInfo::DerivedType *
|
||||
@@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
|
||||
copy.raw().attribute = CFI_attribute_allocatable;
|
||||
Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
|
||||
RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
|
||||
copy.Allocate(kNoAsyncId) == CFI_SUCCESS);
|
||||
copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
||||
ShallowCopyDiscontiguousToContiguous(copy, descriptor);
|
||||
argDescriptor = ©
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
|
||||
#endif
|
||||
}
|
||||
|
||||
RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
|
||||
RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
|
||||
std::size_t elementBytes{ElementBytes()};
|
||||
if (static_cast<std::int64_t>(elementBytes) < 0) {
|
||||
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
|
||||
@@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
|
||||
// Zero size allocation is possible in Fortran and the resulting
|
||||
// descriptor must be allocated/associated. Since std::malloc(0)
|
||||
// result is implementation defined, always allocate at least one byte.
|
||||
void *p{alloc(byteSize ? byteSize : 1, asyncId)};
|
||||
void *p{alloc(byteSize ? byteSize : 1, asyncObject)};
|
||||
if (!p) {
|
||||
return CFI_ERROR_MEM_ALLOCATION;
|
||||
}
|
||||
|
||||
@@ -152,7 +152,7 @@ inline RT_API_ATTRS void CharacterMaxOrMinLoc(const char *intrinsic,
|
||||
CFI_attribute_allocatable);
|
||||
result.GetDimension(0).SetBounds(1, extent[0]);
|
||||
Terminator terminator{source, line};
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
|
||||
}
|
||||
@@ -181,7 +181,7 @@ inline RT_API_ATTRS void TotalNumericMaxOrMinLoc(const char *intrinsic,
|
||||
CFI_attribute_allocatable);
|
||||
result.GetDimension(0).SetBounds(1, extent[0]);
|
||||
Terminator terminator{source, line};
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
|
||||
}
|
||||
|
||||
@@ -220,7 +220,7 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x,
|
||||
CFI_attribute_allocatable);
|
||||
result.GetDimension(0).SetBounds(1, extent[0]);
|
||||
Terminator terminator{source, line};
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"FINDLOC: could not allocate memory for result; STAT=%d", stat);
|
||||
}
|
||||
|
||||
@@ -183,7 +183,7 @@ inline static RT_API_ATTRS void DoMatmulTranspose(
|
||||
for (int j{0}; j < resRank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, extent[j]);
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"MATMUL-TRANSPOSE: could not allocate memory for result; STAT=%d",
|
||||
stat);
|
||||
|
||||
@@ -255,7 +255,7 @@ static inline RT_API_ATTRS void DoMatmul(
|
||||
for (int j{0}; j < resRank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, extent[j]);
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"MATMUL: could not allocate memory for result; STAT=%d", stat);
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ static RT_API_ATTRS void TransferImpl(Descriptor &result,
|
||||
if (const DescriptorAddendum * addendum{mold.Addendum()}) {
|
||||
*result.Addendum() = *addendum;
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
Terminator{sourceFile, line}.Crash(
|
||||
"TRANSFER: could not allocate memory for result; STAT=%d", stat);
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload(
|
||||
byteSize = ((byteSize + align - 1) / align) * align;
|
||||
std::size_t total{byteSize + sizeof(std::uintptr_t)};
|
||||
AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)};
|
||||
void *p{alloc(total, /*asyncId=*/-1)};
|
||||
void *p{alloc(total, /*asyncObject=*/nullptr)};
|
||||
if (p && allocatorIdx == 0) {
|
||||
// Fill the footer word with the XOR of the ones' complement of
|
||||
// the base address, which is a value that would be highly unlikely
|
||||
|
||||
@@ -148,7 +148,7 @@ void DescriptorStorage<COPY_VALUES>::push(const Descriptor &source) {
|
||||
if constexpr (COPY_VALUES) {
|
||||
// copy the data pointed to by the box
|
||||
box.set_base_addr(nullptr);
|
||||
box.Allocate(kNoAsyncId);
|
||||
box.Allocate(kNoAsyncObject);
|
||||
RTNAME(AssignTemporary)
|
||||
(box, source, terminator_.sourceFileName(), terminator_.sourceLine());
|
||||
}
|
||||
|
||||
@@ -261,7 +261,7 @@ RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
|
||||
for (int j{0}; j + 1 < xRank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, resultExtent[j]);
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ static inline RT_API_ATTRS std::size_t AllocateResult(Descriptor &result,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, extent[j]);
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: Could not allocate memory for result (stat=%d)", function, stat);
|
||||
}
|
||||
@@ -157,7 +157,7 @@ static inline RT_API_ATTRS std::size_t AllocateBesselResult(Descriptor &result,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result.GetDimension(j).SetBounds(1, extent[j]);
|
||||
}
|
||||
if (int stat{result.Allocate(kNoAsyncId)}) {
|
||||
if (int stat{result.Allocate(kNoAsyncObject)}) {
|
||||
terminator.Crash(
|
||||
"%s: Could not allocate memory for result (stat=%d)", function, stat);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ int main() {
|
||||
for (int j{0}; j < 3; ++j) {
|
||||
source->GetDimension(j).SetBounds(1, sourceExtent[j]);
|
||||
}
|
||||
TEST(source->Allocate(kNoAsyncId) == CFI_SUCCESS);
|
||||
TEST(source->Allocate(kNoAsyncObject) == CFI_SUCCESS);
|
||||
TEST(source->IsAllocated());
|
||||
MATCH(2, source->GetDimension(0).Extent());
|
||||
MATCH(3, source->GetDimension(1).Extent());
|
||||
|
||||
@@ -26,7 +26,7 @@ TEST(AllocatableTest, MoveAlloc) {
|
||||
auto b{createAllocatable(TypeCategory::Integer, 4)};
|
||||
// ALLOCATE(a(20))
|
||||
a->GetDimension(0).SetBounds(1, 20);
|
||||
a->Allocate(kNoAsyncId);
|
||||
a->Allocate(kNoAsyncObject);
|
||||
|
||||
EXPECT_TRUE(a->IsAllocated());
|
||||
EXPECT_FALSE(b->IsAllocated());
|
||||
@@ -46,7 +46,7 @@ TEST(AllocatableTest, MoveAlloc) {
|
||||
// move_alloc with errMsg
|
||||
auto errMsg{Descriptor::Create(
|
||||
sizeof(char), 64, nullptr, 0, nullptr, CFI_attribute_allocatable)};
|
||||
errMsg->Allocate(kNoAsyncId);
|
||||
errMsg->Allocate(kNoAsyncObject);
|
||||
RTNAME(MoveAlloc)(*b, *a, nullptr, false, errMsg.get(), __FILE__, __LINE__);
|
||||
EXPECT_FALSE(a->IsAllocated());
|
||||
EXPECT_TRUE(b->IsAllocated());
|
||||
|
||||
@@ -42,7 +42,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) {
|
||||
CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes()));
|
||||
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*a, kNoAsyncId, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
|
||||
(*a, kNoAsyncObject, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(a->IsAllocated());
|
||||
RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__);
|
||||
cudaDeviceSynchronize();
|
||||
@@ -82,19 +83,22 @@ TEST(AllocatableCUFTest, StreamDeviceAllocatable) {
|
||||
RTNAME(AllocatableSetBounds)(*c, 0, 1, 100);
|
||||
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*a, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
|
||||
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(a->IsAllocated());
|
||||
cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaSuccess, cudaGetLastError());
|
||||
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*b, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
|
||||
(*b, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(b->IsAllocated());
|
||||
cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaSuccess, cudaGetLastError());
|
||||
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*c, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
|
||||
(*c, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(c->IsAllocated());
|
||||
cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaSuccess, cudaGetLastError());
|
||||
|
||||
@@ -35,7 +35,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
|
||||
EXPECT_FALSE(a->HasAddendum());
|
||||
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(a->IsAllocated());
|
||||
RTNAME(AllocatableDeallocate)
|
||||
@@ -54,7 +54,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
|
||||
EXPECT_FALSE(a->HasAddendum());
|
||||
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
(*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
EXPECT_TRUE(a->IsAllocated());
|
||||
RTNAME(AllocatableDeallocate)
|
||||
|
||||
@@ -50,8 +50,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
|
||||
EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
|
||||
RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
|
||||
RTNAME(AllocatableAllocate)
|
||||
(*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__,
|
||||
__LINE__);
|
||||
(*dev, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr,
|
||||
__FILE__, __LINE__);
|
||||
EXPECT_TRUE(dev->IsAllocated());
|
||||
|
||||
// Create temp array to transfer to device.
|
||||
|
||||
@@ -35,7 +35,7 @@ OwningPtr<Descriptor> CreateDescriptor(const std::vector<SubscriptValue> &shape,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
descriptor->GetDimension(j).SetBounds(2, shape[j] + 1);
|
||||
}
|
||||
if (descriptor->Allocate(kNoAsyncId) != 0) {
|
||||
if (descriptor->Allocate(kNoAsyncObject) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ template <std::size_t n = 64>
|
||||
static OwningPtr<Descriptor> CreateEmptyCharDescriptor() {
|
||||
OwningPtr<Descriptor> descriptor{Descriptor::Create(
|
||||
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
|
||||
if (descriptor->Allocate(kNoAsyncId) != 0) {
|
||||
if (descriptor->Allocate(kNoAsyncObject) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return descriptor;
|
||||
@@ -36,7 +36,7 @@ static OwningPtr<Descriptor> CharDescriptor(const char *value) {
|
||||
std::size_t n{std::strlen(value)};
|
||||
OwningPtr<Descriptor> descriptor{Descriptor::Create(
|
||||
sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)};
|
||||
if (descriptor->Allocate(kNoAsyncId) != 0) {
|
||||
if (descriptor->Allocate(kNoAsyncObject) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
std::memcpy(descriptor->OffsetElement(), value, n);
|
||||
@@ -47,7 +47,7 @@ template <int kind = sizeof(std::int64_t)>
|
||||
static OwningPtr<Descriptor> EmptyIntDescriptor() {
|
||||
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
|
||||
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
|
||||
if (descriptor->Allocate(kNoAsyncId) != 0) {
|
||||
if (descriptor->Allocate(kNoAsyncObject) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return descriptor;
|
||||
@@ -57,7 +57,7 @@ template <int kind = sizeof(std::int64_t)>
|
||||
static OwningPtr<Descriptor> IntDescriptor(const int &value) {
|
||||
OwningPtr<Descriptor> descriptor{Descriptor::Create(TypeCategory::Integer,
|
||||
kind, nullptr, 0, nullptr, CFI_attribute_allocatable)};
|
||||
if (descriptor->Allocate(kNoAsyncId) != 0) {
|
||||
if (descriptor->Allocate(kNoAsyncObject) != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
std::memcpy(descriptor->OffsetElement<int>(), &value, sizeof(int));
|
||||
|
||||
@@ -59,7 +59,7 @@ TEST(TemporaryStack, ValueStackBasic) {
|
||||
Descriptor &outputDesc2{testDescriptorStorage[2].descriptor()};
|
||||
inputDesc.Establish(code, elementBytes, descriptorPtr, rank, extent);
|
||||
|
||||
inputDesc.Allocate(kNoAsyncId);
|
||||
inputDesc.Allocate(kNoAsyncObject);
|
||||
ASSERT_EQ(inputDesc.IsAllocated(), true);
|
||||
uint32_t *inputData = static_cast<uint32_t *>(inputDesc.raw().base_addr);
|
||||
for (std::size_t i = 0; i < inputDesc.Elements(); ++i) {
|
||||
@@ -123,7 +123,7 @@ TEST(TemporaryStack, ValueStackMultiSize) {
|
||||
boxDims.extent = extent[dim];
|
||||
boxDims.sm = elementBytes;
|
||||
}
|
||||
desc->Allocate(kNoAsyncId);
|
||||
desc->Allocate(kNoAsyncObject);
|
||||
|
||||
// fill the array with some data to test
|
||||
for (uint32_t i = 0; i < desc->Elements(); ++i) {
|
||||
|
||||
@@ -42,7 +42,7 @@ static OwningPtr<Descriptor> MakeArray(const std::vector<int> &shape,
|
||||
for (int j{0}; j < rank; ++j) {
|
||||
result->GetDimension(j).SetBounds(1, shape[j]);
|
||||
}
|
||||
int stat{result->Allocate(kNoAsyncId)};
|
||||
int stat{result->Allocate(kNoAsyncObject)};
|
||||
EXPECT_EQ(stat, 0) << stat;
|
||||
EXPECT_LE(data.size(), result->Elements());
|
||||
char *p{result->OffsetElement<char>()};
|
||||
|
||||
Reference in New Issue
Block a user