This will allow code sharing between reduction and privatization after my (still WIP) changes to `omp.private` to use an `alloc` region similar to the one used for reduction declarations.
237 lines
9.4 KiB
C++
237 lines
9.4 KiB
C++
//===-- PrivateReductionUtils.cpp -------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PrivateReductionUtils.h"
|
|
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/HLFIRTools.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
|
#include "flang/Optimizer/Support/FatalError.h"
|
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
|
#include "mlir/IR/Location.h"
|
|
|
|
static void createCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc,
|
|
mlir::Type argType,
|
|
mlir::Region &cleanupRegion) {
|
|
assert(cleanupRegion.empty());
|
|
mlir::Block *block = builder.createBlock(&cleanupRegion, cleanupRegion.end(),
|
|
{argType}, {loc});
|
|
builder.setInsertionPointToEnd(block);
|
|
|
|
auto typeError = [loc]() {
|
|
fir::emitFatalError(loc,
|
|
"Attempt to create an omp cleanup region "
|
|
"for a type that wasn't allocated",
|
|
/*genCrashDiag=*/true);
|
|
};
|
|
|
|
mlir::Type valTy = fir::unwrapRefType(argType);
|
|
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
|
|
if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) {
|
|
mlir::Type innerTy = fir::extractSequenceType(boxTy);
|
|
if (!mlir::isa<fir::SequenceType>(innerTy))
|
|
typeError();
|
|
}
|
|
|
|
mlir::Value arg = builder.loadIfRef(loc, block->getArgument(0));
|
|
assert(mlir::isa<fir::BaseBoxType>(arg.getType()));
|
|
|
|
// Deallocate box
|
|
// The FIR type system doesn't nesecarrily know that this is a mutable box
|
|
// if we allocated the thread local array on the heap to avoid looped stack
|
|
// allocations.
|
|
mlir::Value addr =
|
|
hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg});
|
|
mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr);
|
|
fir::IfOp ifOp =
|
|
builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false);
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
|
|
mlir::Value cast = builder.createConvert(
|
|
loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr);
|
|
builder.create<fir::FreeMemOp>(loc, cast);
|
|
|
|
builder.setInsertionPointAfter(ifOp);
|
|
builder.create<mlir::omp::YieldOp>(loc);
|
|
return;
|
|
}
|
|
|
|
typeError();
|
|
}
|
|
|
|
fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
|
|
mlir::Location loc,
|
|
mlir::Value box) {
|
|
fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
|
|
hlfir::getFortranElementOrSequenceType(box.getType()));
|
|
const unsigned rank = sequenceType.getDimension();
|
|
llvm::SmallVector<mlir::Value> lbAndExtents;
|
|
lbAndExtents.reserve(rank * 2);
|
|
|
|
mlir::Type idxTy = builder.getIndexType();
|
|
for (unsigned i = 0; i < rank; ++i) {
|
|
// TODO: ideally we want to hoist box reads out of the critical section.
|
|
// We could do this by having box dimensions in block arguments like
|
|
// OpenACC does
|
|
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
|
|
auto dimInfo =
|
|
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
|
|
lbAndExtents.push_back(dimInfo.getLowerBound());
|
|
lbAndExtents.push_back(dimInfo.getExtent());
|
|
}
|
|
|
|
auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
|
|
auto shapeShift =
|
|
builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);
|
|
return shapeShift;
|
|
}
|
|
|
|
void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
|
|
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argType,
|
|
mlir::Value scalarInitValue, mlir::Block *initBlock,
|
|
mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
|
|
mlir::Region &cleanupRegion) {
|
|
mlir::Type ty = fir::unwrapRefType(argType);
|
|
builder.setInsertionPointToEnd(initBlock);
|
|
auto yield = [&](mlir::Value ret) {
|
|
builder.create<mlir::omp::YieldOp>(loc, ret);
|
|
};
|
|
|
|
if (fir::isa_trivial(ty)) {
|
|
builder.setInsertionPointToEnd(initBlock);
|
|
|
|
if (scalarInitValue)
|
|
builder.createStoreWithConvert(loc, scalarInitValue, allocatedPrivVarArg);
|
|
yield(allocatedPrivVarArg);
|
|
return;
|
|
}
|
|
|
|
// check if an allocatable box is unallocated. If so, initialize the boxAlloca
|
|
// to be unallocated e.g.
|
|
// %box_alloca = fir.alloca !fir.box<!fir.heap<...>>
|
|
// %addr = fir.box_addr %box
|
|
// if (%addr == 0) {
|
|
// %nullbox = fir.embox %addr
|
|
// fir.store %nullbox to %box_alloca
|
|
// } else {
|
|
// // ...
|
|
// fir.store %something to %box_alloca
|
|
// }
|
|
// omp.yield %box_alloca
|
|
moldArg = builder.loadIfRef(loc, moldArg);
|
|
auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp {
|
|
mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg);
|
|
mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr);
|
|
fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated,
|
|
/*withElseRegion=*/true);
|
|
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
// just embox the null address and return
|
|
mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr);
|
|
builder.create<fir::StoreOp>(loc, nullBox, boxAlloca);
|
|
return ifOp;
|
|
};
|
|
|
|
// all arrays are boxed
|
|
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
|
|
bool isAllocatableOrPointer =
|
|
mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy());
|
|
|
|
builder.setInsertionPointToEnd(initBlock);
|
|
mlir::Value boxAlloca = allocatedPrivVarArg;
|
|
mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy());
|
|
if (fir::isa_trivial(innerTy)) {
|
|
// boxed non-sequence value e.g. !fir.box<!fir.heap<i32>>
|
|
if (!isAllocatableOrPointer)
|
|
TODO(loc,
|
|
"Reduction/Privatization of non-allocatable trivial typed box");
|
|
|
|
fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca);
|
|
|
|
builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
|
|
mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy);
|
|
if (scalarInitValue)
|
|
builder.createStoreWithConvert(loc, scalarInitValue, valAlloc);
|
|
mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc);
|
|
builder.create<fir::StoreOp>(loc, box, boxAlloca);
|
|
|
|
createCleanupRegion(builder, loc, argType, cleanupRegion);
|
|
builder.setInsertionPointAfter(ifUnallocated);
|
|
yield(boxAlloca);
|
|
return;
|
|
}
|
|
innerTy = fir::extractSequenceType(boxTy);
|
|
if (!mlir::isa<fir::SequenceType>(innerTy))
|
|
TODO(loc, "Unsupported boxed type for reduction/privatization");
|
|
|
|
fir::IfOp ifUnallocated{nullptr};
|
|
if (isAllocatableOrPointer) {
|
|
ifUnallocated = handleNullAllocatable(boxAlloca);
|
|
builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front());
|
|
}
|
|
|
|
// Create the private copy from the initial fir.box:
|
|
mlir::Value loadedBox = builder.loadIfRef(loc, moldArg);
|
|
hlfir::Entity source = hlfir::Entity{loadedBox};
|
|
|
|
// Allocating on the heap in case the whole reduction is nested inside of a
|
|
// loop
|
|
// TODO: compare performance here to using allocas - this could be made to
|
|
// work by inserting stacksave/stackrestore around the reduction in
|
|
// openmpirbuilder
|
|
auto [temp, needsDealloc] = createTempFromMold(loc, builder, source);
|
|
// if needsDealloc isn't statically false, add cleanup region. Always
|
|
// do this for allocatable boxes because they might have been re-allocated
|
|
// in the body of the loop/parallel region
|
|
|
|
std::optional<int64_t> cstNeedsDealloc =
|
|
fir::getIntIfConstant(needsDealloc);
|
|
assert(cstNeedsDealloc.has_value() &&
|
|
"createTempFromMold decides this statically");
|
|
if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) {
|
|
mlir::OpBuilder::InsertionGuard guard(builder);
|
|
createCleanupRegion(builder, loc, argType, cleanupRegion);
|
|
} else {
|
|
assert(!isAllocatableOrPointer &&
|
|
"Pointer-like arrays must be heap allocated");
|
|
}
|
|
|
|
// Put the temporary inside of a box:
|
|
// hlfir::genVariableBox doesn't handle non-default lower bounds
|
|
mlir::Value box;
|
|
fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox);
|
|
mlir::Type boxType = loadedBox.getType();
|
|
if (mlir::isa<fir::BaseBoxType>(temp.getType()))
|
|
// the box created by the declare form createTempFromMold is missing lower
|
|
// bounds info
|
|
box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift,
|
|
/*shift=*/mlir::Value{});
|
|
else
|
|
box = builder.create<fir::EmboxOp>(
|
|
loc, boxType, temp, shapeShift,
|
|
/*slice=*/mlir::Value{},
|
|
/*typeParams=*/llvm::ArrayRef<mlir::Value>{});
|
|
|
|
if (scalarInitValue)
|
|
builder.create<hlfir::AssignOp>(loc, scalarInitValue, box);
|
|
builder.create<fir::StoreOp>(loc, box, boxAlloca);
|
|
if (ifUnallocated)
|
|
builder.setInsertionPointAfter(ifUnallocated);
|
|
yield(boxAlloca);
|
|
return;
|
|
}
|
|
|
|
TODO(loc,
|
|
"creating reduction/privatization init region for unsupported type");
|
|
return;
|
|
}
|