[flang] Code generation for fir.pack/unpack_array. (#132080)

The code generation relies on `ShallowCopyDirect` runtime
to copy data between the original and the temporary arrays
(both directions). The allocations are done by the compiler
generated code. The heap allocations could have been passed
to `ShallowCopy` runtime, but I decided to expose the allocations
so that the temporary descriptor passed to `ShallowCopyDirect`
has `nocapture` - maybe this will be better for LLVM optimizations.
This commit is contained in:
Slava Zakharin
2025-03-31 11:42:17 -07:00
committed by GitHub
parent 0ac8cb1b3d
commit 5f268d04f9
15 changed files with 1664 additions and 78 deletions

View File

@@ -268,6 +268,40 @@ public:
mlir::ValueRange lenParams = {},
llvm::ArrayRef<mlir::NamedAttribute> attrs = {});
/// Sample genDeclare callback for createArrayTemp() below.
/// It creates fir.declare operation using the given operands.
/// \p memref is the base of the allocated temporary,
/// which may be !fir.ref<!fir.array<>> or !fir.ref<!fir.box/class<>>.
static mlir::Value genTempDeclareOp(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value memref,
llvm::StringRef name, mlir::Value shape,
llvm::ArrayRef<mlir::Value> typeParams,
fir::FortranVariableFlagsAttr attrs);
/// Create a temporary array with the given \p arrayType,
/// \p shape, \p extents and \p typeParams. An optional
/// \p polymorphicMold specifies the entity which dynamic type
/// has to be used for the allocation.
/// \p genDeclare callback generates a declare operation
/// for the created temporary. FIR passes may use genTempDeclareOp()
/// function above that creates fir.declare.
/// HLFIR passes may provide their own callback that generates
/// hlfir.declare. Some passes may provide a callback that
/// just passes through the base of the temporary.
/// If \p useStack is true, the function will try to do the allocation
/// in stack memory (which is not always possible currently).
/// The first return value is the base of the temporary object,
/// which may be !fir.ref<!fir.array<>> or !fir.ref<!fir.box/class<>>.
/// The second return value is true, if the actual allocation
/// was done in heap memory.
std::pair<mlir::Value, bool>
createArrayTemp(mlir::Location loc, fir::SequenceType arrayType,
mlir::Value shape, llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> typeParams,
const std::function<decltype(genTempDeclareOp)> &genDeclare,
mlir::Value polymorphicMold, bool useStack = false,
llvm::StringRef tmpName = ".tmp.array");
/// Create an LLVM stack save intrinsic op. Returns the saved stack pointer.
/// The stack address space is fetched from the data layout of the current
/// module.
@@ -596,6 +630,15 @@ public:
return result;
}
/// Compare two pointer-like values using the given predicate.
mlir::Value genPtrCompare(mlir::Location loc,
mlir::arith::CmpIPredicate predicate,
mlir::Value ptr1, mlir::Value ptr2) {
ptr1 = createConvert(loc, getIndexType(), ptr1);
ptr2 = createConvert(loc, getIndexType(), ptr2);
return create<mlir::arith::CmpIOp>(loc, predicate, ptr1, ptr2);
}
private:
/// Set attributes (e.g. FastMathAttr) to \p op operation
/// based on the current attributes setting.
@@ -850,6 +893,17 @@ llvm::SmallVector<mlir::Value> deduceOptimalExtents(mlir::ValueRange extents1,
/// %result1 = arith.select %p4, %c0, %e1 : index
llvm::SmallVector<mlir::Value> updateRuntimeExtentsForEmptyArrays(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::ValueRange extents);
/// Given \p box of type fir::BaseBoxType representing an array,
/// the function generates code to fetch the lower bounds,
/// the extents and the strides from the box. The values are returned via
/// \p lbounds, \p extents and \p strides.
void genDimInfoFromBox(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value box,
llvm::SmallVectorImpl<mlir::Value> *lbounds,
llvm::SmallVectorImpl<mlir::Value> *extents,
llvm::SmallVectorImpl<mlir::Value> *strides);
} // namespace fir::factory
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H

View File

@@ -99,4 +99,15 @@ def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
];
}
def LowerRepackArraysPass : Pass<"lower-repack-arrays", "mlir::ModuleOp"> {
let summary = "Convert fir.pack/unpack_array to other FIR operations";
let description = [{
Convert fir.pack/unpack_array operations to other FIR operations
and Fortran runtime calls that implement the semantics
of packing/unpacking.
}];
let dependentDialects = ["fir::FIROpsDialect", "mlir::arith::ArithDialect",
"mlir::func::FuncDialect"];
}
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES

View File

@@ -26,6 +26,7 @@ struct NameUniquer;
#define GEN_PASS_DECL_CODEGENREWRITE
#define GEN_PASS_DECL_TARGETREWRITEPASS
#define GEN_PASS_DECL_BOXEDPROCEDUREPASS
#define GEN_PASS_DECL_LOWERREPACKARRAYSPASS
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
/// FIR to LLVM translation pass options.

View File

@@ -11,6 +11,7 @@
#include "flang/Optimizer/Builder/Character.h"
#include "flang/Optimizer/Builder/Complex.h"
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Optimizer/Builder/Runtime/Allocatable.h"
#include "flang/Optimizer/Builder/Runtime/Assign.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Todo.h"
@@ -362,6 +363,72 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary(
name, dynamicLength, dynamicShape, attrs);
}
std::pair<mlir::Value, bool> fir::FirOpBuilder::createArrayTemp(
mlir::Location loc, fir::SequenceType arrayType, mlir::Value shape,
llvm::ArrayRef<mlir::Value> extents, llvm::ArrayRef<mlir::Value> typeParams,
const std::function<decltype(FirOpBuilder::genTempDeclareOp)> &genDeclare,
mlir::Value polymorphicMold, bool useStack, llvm::StringRef tmpName) {
if (polymorphicMold) {
// Create *allocated* polymorphic temporary using the dynamic type
// of the mold and the provided shape/extents. The created temporary
// array will be written element per element, that is why it has to be
// allocated.
mlir::Type boxHeapType = fir::HeapType::get(arrayType);
mlir::Value alloc = fir::factory::genNullBoxStorage(
*this, loc, fir::ClassType::get(boxHeapType));
fir::FortranVariableFlagsAttr declAttrs =
fir::FortranVariableFlagsAttr::get(
getContext(), fir::FortranVariableFlagsEnum::allocatable);
mlir::Value base = genDeclare(*this, loc, alloc, tmpName,
/*shape=*/nullptr, typeParams, declAttrs);
int rank = extents.size();
fir::runtime::genAllocatableApplyMold(*this, loc, alloc, polymorphicMold,
rank);
if (!extents.empty()) {
mlir::Type idxTy = getIndexType();
mlir::Value one = createIntegerConstant(loc, idxTy, 1);
unsigned dim = 0;
for (mlir::Value extent : extents) {
mlir::Value dimIndex = createIntegerConstant(loc, idxTy, dim++);
fir::runtime::genAllocatableSetBounds(*this, loc, alloc, dimIndex, one,
extent);
}
}
if (!typeParams.empty()) {
// We should call AllocatableSetDerivedLength() here.
// TODO: does the mold provide the length parameters or
// the operation itself or should they be in sync?
TODO(loc, "polymorphic type with length parameters");
}
fir::runtime::genAllocatableAllocate(*this, loc, alloc);
return {base, /*isHeapAllocation=*/true};
}
mlir::Value allocmem;
if (useStack)
allocmem = createTemporary(loc, arrayType, tmpName, extents, typeParams);
else
allocmem =
createHeapTemporary(loc, arrayType, tmpName, extents, typeParams);
mlir::Value base = genDeclare(*this, loc, allocmem, tmpName, shape,
typeParams, fir::FortranVariableFlagsAttr{});
return {base, !useStack};
}
mlir::Value fir::FirOpBuilder::genTempDeclareOp(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value memref,
llvm::StringRef name, mlir::Value shape,
llvm::ArrayRef<mlir::Value> typeParams,
fir::FortranVariableFlagsAttr fortranAttrs) {
auto nameAttr = mlir::StringAttr::get(builder.getContext(), name);
return builder.create<fir::DeclareOp>(loc, memref.getType(), memref, shape,
typeParams,
/*dummy_scope=*/nullptr, nameAttr,
fortranAttrs, cuf::DataAttributeAttr{});
}
mlir::Value fir::FirOpBuilder::genStackSave(mlir::Location loc) {
mlir::Type voidPtr = mlir::LLVM::LLVMPointerType::get(
getContext(), fir::factory::getAllocaAddressSpace(&getDataLayout()));
@@ -1825,3 +1892,29 @@ llvm::SmallVector<mlir::Value> fir::factory::updateRuntimeExtentsForEmptyArrays(
}
return newExtents;
}
void fir::factory::genDimInfoFromBox(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value box,
llvm::SmallVectorImpl<mlir::Value> *lbounds,
llvm::SmallVectorImpl<mlir::Value> *extents,
llvm::SmallVectorImpl<mlir::Value> *strides) {
auto boxType = mlir::dyn_cast<fir::BaseBoxType>(box.getType());
assert(boxType && "must be a box");
if (!lbounds && !extents && !strides)
return;
unsigned rank = fir::getBoxRank(boxType);
assert(rank != 0 && "must be an array of known rank");
mlir::Type idxTy = builder.getIndexType();
for (unsigned i = 0; i < rank; ++i) {
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
if (lbounds)
lbounds->push_back(dimInfo.getLowerBound());
if (extents)
extents->push_back(dimInfo.getExtent());
if (strides)
strides->push_back(dimInfo.getByteStride());
}
}

View File

@@ -95,24 +95,6 @@ getExplicitLbounds(fir::FortranVariableOpInterface var) {
return {};
}
static void
genLboundsAndExtentsFromBox(mlir::Location loc, fir::FirOpBuilder &builder,
hlfir::Entity boxEntity,
llvm::SmallVectorImpl<mlir::Value> &lbounds,
llvm::SmallVectorImpl<mlir::Value> *extents) {
assert(mlir::isa<fir::BaseBoxType>(boxEntity.getType()) && "must be a box");
mlir::Type idxTy = builder.getIndexType();
const int rank = boxEntity.getRank();
for (int i = 0; i < rank; ++i) {
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
boxEntity, dim);
lbounds.push_back(dimInfo.getLowerBound());
if (extents)
extents->push_back(dimInfo.getExtent());
}
}
static llvm::SmallVector<mlir::Value>
getNonDefaultLowerBounds(mlir::Location loc, fir::FirOpBuilder &builder,
hlfir::Entity entity) {
@@ -128,8 +110,8 @@ getNonDefaultLowerBounds(mlir::Location loc, fir::FirOpBuilder &builder,
if (entity.isMutableBox())
entity = hlfir::derefPointersAndAllocatables(loc, builder, entity);
llvm::SmallVector<mlir::Value> lowerBounds;
genLboundsAndExtentsFromBox(loc, builder, entity, lowerBounds,
/*extents=*/nullptr);
fir::factory::genDimInfoFromBox(builder, loc, entity, &lowerBounds,
/*extents=*/nullptr, /*strides=*/nullptr);
return lowerBounds;
}
@@ -1149,8 +1131,8 @@ static fir::ExtendedValue translateVariableToExtendedValue(
variable.mayHaveNonDefaultLowerBounds()) {
// This special case avoids generating two sets of identical
// fir.box_dim to get both the lower bounds and extents.
genLboundsAndExtentsFromBox(loc, builder, variable, nonDefaultLbounds,
&extents);
fir::factory::genDimInfoFromBox(builder, loc, variable, &nonDefaultLbounds,
&extents, /*strides=*/nullptr);
} else {
extents = getVariableExtents(loc, builder, variable);
nonDefaultLbounds = getNonDefaultLowerBounds(loc, builder, variable);

View File

@@ -4,6 +4,7 @@ add_flang_library(FIRCodeGen
CodeGen.cpp
CodeGenOpenMP.cpp
FIROpPatterns.cpp
LowerRepackArrays.cpp
PreCGRewrite.cpp
TBAABuilder.cpp
Target.cpp

View File

@@ -0,0 +1,330 @@
//===-- LowerRepackArrays.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This pass expands fir.pack_array and fir.unpack_array operations
/// into sequences of other FIR operations and Fortran runtime calls.
/// This pass is using structured control flow FIR operations such
/// as fir.if, so its placement in the pipeline should guarantee
/// further lowering of these operations.
///
/// A fir.pack_array operation is converted into a sequence of checks
/// identifying whether an array needs to be copied into a contiguous
/// temporary. When the checks pass, a new memory allocation is done
/// for the temporary array (in either stack or heap memory).
/// If `fir.pack_array` does not have no_copy attribute, then
/// the original array is shallow-copied into the temporary.
///
/// A fir.unpack_array operations is converted into a check
/// of whether the original and the temporary arrays are different
/// memory. When the check passes, the temporary array might be
/// shallow-copied into the original array, and then the temporary
/// array is deallocated (if it was allocated in stack memory,
/// then there is no explicit deallocation).
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/CodeGen/CodeGen.h"
#include "flang/Optimizer/Builder/Character.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Optimizer/Builder/Runtime/Allocatable.h"
#include "flang/Optimizer/Builder/Runtime/Transformational.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
namespace fir {
#define GEN_PASS_DEF_LOWERREPACKARRAYSPASS
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
} // namespace fir
#define DEBUG_TYPE "lower-repack-arrays"
namespace {
class PackArrayConversion : public mlir::OpRewritePattern<fir::PackArrayOp> {
public:
using OpRewritePattern::OpRewritePattern;
mlir::LogicalResult
matchAndRewrite(fir::PackArrayOp op,
mlir::PatternRewriter &rewriter) const override;
private:
static constexpr llvm::StringRef bufferName = ".repacked";
// Return value of fir::BaseBoxType that represents a temporary
// array created for the original box with given extents and
// type parameters. The new box has the default lower bounds.
// If useStack is true, then the temporary will be allocated
// in stack memory (when possible).
static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder,
mlir::Location loc, bool useStack,
mlir::Value origBox,
llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> typeParams);
// Generate value of fir::BaseBoxType that represents the result
// of the given fir.pack_array operation. The original box
// is assumed to be present (though, it may represent an empty array).
static mlir::FailureOr<mlir::Value> genRepackedBox(fir::FirOpBuilder &builder,
mlir::Location loc,
fir::PackArrayOp packOp);
};
class UnpackArrayConversion
: public mlir::OpRewritePattern<fir::UnpackArrayOp> {
public:
using OpRewritePattern::OpRewritePattern;
mlir::LogicalResult
matchAndRewrite(fir::UnpackArrayOp op,
mlir::PatternRewriter &rewriter) const override;
};
} // anonymous namespace
// Return true iff for the given original boxed array we can
// allocate temporary memory in stack memory.
// This function is used to synchronize allocation/deallocation
// implied by fir.pack_array and fir.unpack_array, because
// the presence of the stack attribute does not automatically
// mean that the allocation is actually done in stack memory.
// For example, we always do the heap allocation for polymorphic
// types using Fortran runtime.
// Adding the polymorpic mold to fir.alloca and then using
// Fortran runtime to compute the allocation size could probably
// resolve this limitation.
static bool canAllocateTempOnStack(mlir::Value box) {
return !fir::isPolymorphicType(box.getType());
}
mlir::LogicalResult
PackArrayConversion::matchAndRewrite(fir::PackArrayOp op,
mlir::PatternRewriter &rewriter) const {
mlir::Location loc = op.getLoc();
fir::FirOpBuilder builder(rewriter, op.getOperation());
if (op.getMaxSize() || op.getMaxElementSize() || op.getMinStride())
TODO(loc, "fir.pack_array with constraints");
if (op.getHeuristics() != fir::PackArrayHeuristics::None)
TODO(loc, "fir.pack_array with heuristics");
mlir::Value box = op.getArray();
auto boxType = mlir::cast<fir::BaseBoxType>(box.getType());
// For now we have to always check if the box is present.
auto isPresent =
builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), box);
fir::IfOp ifOp = builder.create<fir::IfOp>(loc, boxType, isPresent,
/*withElseRegion=*/true);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
// The box is present.
auto newBox = genRepackedBox(builder, loc, op);
if (mlir::failed(newBox))
return newBox;
builder.create<fir::ResultOp>(loc, *newBox);
// The box is not present. Return original box.
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
builder.create<fir::ResultOp>(loc, box);
rewriter.replaceOp(op, ifOp.getResult(0));
return mlir::success();
}
mlir::Value PackArrayConversion::allocateTempBuffer(
fir::FirOpBuilder &builder, mlir::Location loc, bool useStack,
mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> typeParams) {
auto tempType = mlir::cast<fir::SequenceType>(
fir::extractSequenceType(origBox.getType()));
assert(tempType.getDimension() == extents.size() &&
"number of extents does not match the rank");
mlir::Value shape = builder.genShape(loc, extents);
auto [base, isHeapAllocation] = builder.createArrayTemp(
loc, tempType, shape, extents, typeParams,
fir::FirOpBuilder::genTempDeclareOp,
fir::isPolymorphicType(origBox.getType()) ? origBox : nullptr, useStack,
bufferName);
// Make sure canAllocateTempOnStack() can recognize when
// the temporary is actually allocated on the stack
// by createArrayTemp(). Otherwise, we may miss dynamic
// deallocation when lowering fir.unpack_array.
if (useStack && canAllocateTempOnStack(origBox))
assert(!isHeapAllocation && "temp must have been allocated on the stack");
if (isHeapAllocation)
if (auto baseType = mlir::dyn_cast<fir::ReferenceType>(base.getType()))
if (mlir::isa<fir::BaseBoxType>(baseType.getEleTy()))
return builder.create<fir::LoadOp>(loc, base);
mlir::Type ptrType = base.getType();
mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType)
? ptrType
: fir::unwrapRefType(ptrType));
mlir::Value newBox =
builder.createBox(loc, tempBoxType, base, shape, /*slice=*/nullptr,
typeParams, /*tdesc=*/nullptr);
return newBox;
}
mlir::FailureOr<mlir::Value>
PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
mlir::Location loc, fir::PackArrayOp op) {
mlir::OpBuilder::InsertionGuard guard(builder);
mlir::Value box = op.getArray();
llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(),
op.getTypeparams().end());
auto boxType = mlir::cast<fir::BaseBoxType>(box.getType());
mlir::Type indexType = builder.getIndexType();
// If type parameters are not specified by fir.pack_array,
// figure out how many of them we need to read from the box.
unsigned numTypeParams = 0;
if (typeParams.size() == 0) {
if (auto recordType =
mlir::dyn_cast<fir::RecordType>(boxType.unwrapInnerType()))
if (recordType.getNumLenParams() != 0)
TODO(loc,
"allocating temporary for a parameterized derived type array");
if (auto charType =
mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType())) {
if (charType.hasDynamicLen()) {
// Read one length parameter from the box.
numTypeParams = 1;
} else {
// Place the constant length into typeParams.
mlir::Value length =
builder.createIntegerConstant(loc, indexType, charType.getLen());
typeParams.push_back(length);
}
}
}
// Create a temporay iff the original is not contigous and is not empty.
auto isNotContiguous = builder.genNot(
loc, builder.create<fir::IsContiguousBoxOp>(loc, box, op.getInnermost()));
auto dataAddr =
builder.create<fir::BoxAddrOp>(loc, fir::boxMemRefType(boxType), box);
auto isNotEmpty =
builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), dataAddr);
auto doPack =
builder.create<mlir::arith::AndIOp>(loc, isNotContiguous, isNotEmpty);
fir::IfOp ifOp =
builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true);
// Return original box.
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
builder.create<fir::ResultOp>(loc, box);
// Create a new box.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
// Get lower bounds and extents from the box.
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> lbounds, extents;
fir::factory::genDimInfoFromBox(builder, loc, box, &lbounds, &extents,
/*strides=*/nullptr);
// Get the type parameters from the box, if needed.
llvm::SmallVector<mlir::Value> assumedTypeParams;
if (numTypeParams != 0) {
if (auto charType =
mlir::dyn_cast<fir::CharacterType>(boxType.unwrapInnerType()))
if (charType.hasDynamicLen()) {
fir::factory::CharacterExprHelper charHelper(builder, loc);
mlir::Value len = charHelper.readLengthFromBox(box, charType);
typeParams.push_back(builder.createConvert(loc, indexType, len));
}
if (numTypeParams != typeParams.size())
return emitError(loc) << "failed to compute the type parameters for "
<< op.getOperation() << '\n';
}
mlir::Value tempBox =
allocateTempBuffer(builder, loc, op.getStack(), box, extents, typeParams);
if (!op.getNoCopy())
fir::runtime::genShallowCopy(builder, loc, tempBox, box,
/*resultIsAllocated=*/true);
// Set lower bounds after the original box.
mlir::Value shift = builder.genShift(loc, lbounds);
tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shift,
/*slice=*/nullptr);
builder.create<fir::ResultOp>(loc, tempBox);
return ifOp.getResult(0);
}
mlir::LogicalResult
UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
mlir::PatternRewriter &rewriter) const {
mlir::Location loc = op.getLoc();
fir::FirOpBuilder builder(rewriter, op.getOperation());
mlir::Type predicateType = builder.getI1Type();
mlir::Value tempBox = op.getTemp();
mlir::Value originalBox = op.getOriginal();
// For now we have to always check if the box is present.
auto isPresent =
builder.create<fir::IsPresentOp>(loc, predicateType, originalBox);
builder.genIfThen(loc, isPresent).genThen([&]() {
mlir::Type addrType =
fir::HeapType::get(fir::extractSequenceType(tempBox.getType()));
mlir::Value tempAddr =
builder.create<fir::BoxAddrOp>(loc, addrType, tempBox);
mlir::Value originalAddr =
builder.create<fir::BoxAddrOp>(loc, addrType, originalBox);
auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne,
tempAddr, originalAddr);
builder.genIfThen(loc, isNotSame).genThen([&]() {});
// Copy from temporary to the original.
if (!op.getNoCopy())
fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
/*resultIsAllocated=*/true);
// Deallocate, if it was allocated in heap.
// Note that the stack attribute does not always mean
// that the allocation was actually done in stack memory.
// There are currently cases where we delegate the allocation
// to the runtime that uses heap memory, even when the stack
// attribute is set on fir.pack_array.
if (!op.getStack() || !canAllocateTempOnStack(originalBox))
builder.create<fir::FreeMemOp>(loc, tempAddr);
});
rewriter.eraseOp(op);
return mlir::success();
}
namespace {
class LowerRepackArraysPass
: public fir::impl::LowerRepackArraysPassBase<LowerRepackArraysPass> {
public:
using LowerRepackArraysPassBase<
LowerRepackArraysPass>::LowerRepackArraysPassBase;
void runOnOperation() override final {
auto *context = &getContext();
mlir::ModuleOp module = getOperation();
mlir::RewritePatternSet patterns(context);
patterns.insert<PackArrayConversion>(context);
patterns.insert<UnpackArrayConversion>(context);
mlir::GreedyRewriteConfig config;
config.enableRegionSimplification =
mlir::GreedySimplifyRegionLevel::Disabled;
(void)applyPatternsGreedily(module, std::move(patterns), config);
}
};
} // anonymous namespace

View File

@@ -105,60 +105,27 @@ static mlir::Value getBufferizedExprMustFreeFlag(mlir::Value bufferizedExpr) {
static std::pair<hlfir::Entity, mlir::Value>
createArrayTemp(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::Type exprType, mlir::Value shape,
mlir::ValueRange extents, mlir::ValueRange lenParams,
llvm::ArrayRef<mlir::Value> extents,
llvm::ArrayRef<mlir::Value> lenParams,
std::optional<hlfir::Entity> polymorphicMold) {
mlir::Type sequenceType = hlfir::getFortranElementOrSequenceType(exprType);
llvm::StringRef tmpName{".tmp.array"};
if (polymorphicMold) {
// Create *allocated* polymorphic temporary using the dynamic type
// of the mold and the provided shape/extents. The created temporary
// array will be written element per element, that is why it has to be
// allocated.
mlir::Type boxHeapType = fir::HeapType::get(sequenceType);
mlir::Value alloc = fir::factory::genNullBoxStorage(
builder, loc, fir::ClassType::get(boxHeapType));
mlir::Value isHeapAlloc = builder.createBool(loc, true);
fir::FortranVariableFlagsAttr declAttrs =
fir::FortranVariableFlagsAttr::get(
builder.getContext(), fir::FortranVariableFlagsEnum::allocatable);
auto sequenceType = mlir::cast<fir::SequenceType>(
hlfir::getFortranElementOrSequenceType(exprType));
auto genTempDeclareOp =
[](fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value memref,
llvm::StringRef name, mlir::Value shape,
llvm::ArrayRef<mlir::Value> typeParams,
fir::FortranVariableFlagsAttr attrs) -> mlir::Value {
auto declareOp =
builder.create<hlfir::DeclareOp>(loc, alloc, tmpName,
/*shape=*/nullptr, lenParams,
/*dummy_scope=*/nullptr, declAttrs);
builder.create<hlfir::DeclareOp>(loc, memref, name, shape, typeParams,
/*dummy_scope=*/nullptr, attrs);
return declareOp.getBase();
};
int rank = extents.size();
fir::runtime::genAllocatableApplyMold(builder, loc, alloc,
polymorphicMold->getFirBase(), rank);
if (!extents.empty()) {
mlir::Type idxTy = builder.getIndexType();
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
unsigned dim = 0;
for (mlir::Value extent : extents) {
mlir::Value dimIndex = builder.createIntegerConstant(loc, idxTy, dim++);
fir::runtime::genAllocatableSetBounds(builder, loc, alloc, dimIndex,
one, extent);
}
}
if (!lenParams.empty()) {
// We should call AllocatableSetDerivedLength() here.
// TODO: does the mold provide the length parameters or
// the operation itself or should they be in sync?
TODO(loc, "polymorphic type with length parameters in HLFIR");
}
fir::runtime::genAllocatableAllocate(builder, loc, alloc);
return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc};
}
mlir::Value allocmem = builder.createHeapTemporary(loc, sequenceType, tmpName,
extents, lenParams);
auto declareOp = builder.create<hlfir::DeclareOp>(
loc, allocmem, tmpName, shape, lenParams,
/*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
mlir::Value trueVal = builder.createBool(loc, true);
return {hlfir::Entity{declareOp.getBase()}, trueVal};
auto [base, isHeapAlloc] = builder.createArrayTemp(
loc, sequenceType, shape, extents, lenParams, genTempDeclareOp,
polymorphicMold ? polymorphicMold->getFirBase() : nullptr);
return {hlfir::Entity{base}, builder.createBool(loc, isHeapAlloc)};
}
/// Copy \p source into a new temporary and package the temporary into a
@@ -786,9 +753,10 @@ struct ElementalOpConversion
if (adaptor.getMold())
mold = getBufferizedExprStorage(adaptor.getMold());
auto extents = hlfir::getIndexExtents(loc, builder, shape);
auto [temp, cleanup] =
createArrayTemp(loc, builder, elemental.getType(), shape, extents,
adaptor.getTypeparams(), mold);
llvm::SmallVector<mlir::Value> typeParams(adaptor.getTypeparams().begin(),
adaptor.getTypeparams().end());
auto [temp, cleanup] = createArrayTemp(loc, builder, elemental.getType(),
shape, extents, typeParams, mold);
// If the box load is needed, we'd better place it outside
// of the loop nest.
temp = derefPointersAndAllocatables(loc, builder, temp);

View File

@@ -198,6 +198,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createPolymorphicOpConversion());
pm.addPass(fir::createAssumedRankOpConversion());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
pm.addPass(fir::createSimplifyFIROperations(

View File

@@ -47,6 +47,7 @@ end program
! CHECK-NEXT: PolymorphicOpConversion
! CHECK-NEXT: AssumedRankOpConversion
! CHECK-NEXT: LowerRepackArraysPass
! CHECK-NEXT: SimplifyFIROperations
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']

View File

@@ -77,6 +77,7 @@ end program
! ALL-NEXT: PolymorphicOpConversion
! ALL-NEXT: AssumedRankOpConversion
! ALL-NEXT: LowerRepackArraysPass
! ALL-NEXT: SimplifyFIROperations
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']

View File

@@ -101,6 +101,7 @@ end program
! ALL-NEXT: PolymorphicOpConversion
! ALL-NEXT: AssumedRankOpConversion
! ALL-NEXT: LowerRepackArraysPass
! ALL-NEXT: SimplifyFIROperations
! O2-NEXT: AddAliasTags

View File

@@ -99,6 +99,7 @@ func.func @_QQmain() {
// PASSES-NEXT: PolymorphicOpConversion
// PASSES-NEXT: AssumedRankOpConversion
// PASSES-NEXT: LowerRepackArraysPass
// PASSES-NEXT: SimplifyFIROperations
// PASSES-NEXT: AddAliasTags

View File

@@ -166,7 +166,6 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_9]], %[[VAL_9]] : (index, index) -> !fir.shape<2>
// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_8]](%[[VAL_10]]) : (!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>, !fir.shape<2>) -> !fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>
// CHECK: fir.store %[[VAL_11]] to %[[VAL_4]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_12:.*]] = arith.constant true
// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = ".tmp.array"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>)
// CHECK: %[[RANK:.*]] = arith.constant 2 : i32
// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
@@ -193,6 +192,7 @@ func.func @test_polymorphic(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.bindc_
// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_12:.*]] = arith.constant true
// CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_40:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_41:.*]] = %[[VAL_40]] to %[[EX1]] step %[[VAL_40]] unordered {
@@ -250,7 +250,6 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]], %[[VAL_10]] : (index, index) -> !fir.shape<2>
// CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_9]](%[[VAL_11]]) : (!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>, !fir.shape<2>) -> !fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>
// CHECK: fir.store %[[VAL_12]] to %[[VAL_5]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_13:.*]] = arith.constant true
// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_5]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = ".tmp.array"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>)
// CHECK: %[[VAL_15:.*]] = arith.constant 2 : i32
// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
@@ -277,6 +276,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_13:.*]] = arith.constant true
// CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_41:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_42:.*]] = %[[VAL_41]] to %[[VAL_3]] step %[[VAL_41]] unordered {
@@ -303,7 +303,6 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_60:.*]] = fir.shape %[[VAL_59]], %[[VAL_59]] : (index, index) -> !fir.shape<2>
// CHECK: %[[VAL_61:.*]] = fir.embox %[[VAL_58]](%[[VAL_60]]) : (!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>, !fir.shape<2>) -> !fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>
// CHECK: fir.store %[[VAL_61]] to %[[VAL_4]] : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_62:.*]] = arith.constant true
// CHECK: %[[VAL_63:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = ".tmp.array"} : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>, !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>)
// CHECK: %[[VAL_64:.*]] = arith.constant 2 : i32
// CHECK: %[[VAL_65:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
@@ -330,6 +329,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class<!fir.type<_QMtypesTt>> {fir.b
// CHECK: %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
// CHECK: %[[VAL_62:.*]] = arith.constant true
// CHECK: %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref<!fir.class<!fir.heap<!fir.array<?x?x!fir.type<_QMtypesTt>>>>>
// CHECK: %[[VAL_90:.*]] = arith.constant 1 : index
// CHECK: fir.do_loop %[[VAL_91:.*]] = %[[VAL_90]] to %[[VAL_3]] step %[[VAL_90]] unordered {

File diff suppressed because it is too large Load Diff