The `extract_strided_metadata` will be heavily used by the new buffer deallocation pass to get the base memref and pass it to the deallocation operation. This commit factors out some simplification logic of the pass into a canonicalization pattern. Reviewed By: springerm Differential Revision: https://reviews.llvm.org/D157255
1035 lines
39 KiB
C++
1035 lines
39 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/Arith/IR/Arith.h"
|
|
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
|
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
|
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
|
#include "mlir/IR/Matchers.h"
|
|
#include <optional>
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::bufferization;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Helper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
FailureOr<Value>
|
|
mlir::bufferization::castOrReallocMemRefValue(OpBuilder &b, Value value,
|
|
MemRefType destType) {
|
|
auto srcType = llvm::cast<MemRefType>(value.getType());
|
|
|
|
// Element type, rank and memory space must match.
|
|
if (srcType.getElementType() != destType.getElementType())
|
|
return failure();
|
|
if (srcType.getMemorySpace() != destType.getMemorySpace())
|
|
return failure();
|
|
if (srcType.getRank() != destType.getRank())
|
|
return failure();
|
|
|
|
// In case the affine maps are different, we may need to use a copy if we go
|
|
// from dynamic to static offset or stride (the canonicalization cannot know
|
|
// at this point that it is really cast compatible).
|
|
auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
|
|
int64_t sourceOffset, targetOffset;
|
|
SmallVector<int64_t, 4> sourceStrides, targetStrides;
|
|
if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
|
|
failed(getStridesAndOffset(target, targetStrides, targetOffset)))
|
|
return false;
|
|
auto dynamicToStatic = [](int64_t a, int64_t b) {
|
|
return ShapedType::isDynamic(a) && !ShapedType::isDynamic(b);
|
|
};
|
|
if (dynamicToStatic(sourceOffset, targetOffset))
|
|
return false;
|
|
for (auto it : zip(sourceStrides, targetStrides))
|
|
if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
|
|
return false;
|
|
return true;
|
|
};
|
|
|
|
// Note: If `areCastCompatible`, a cast is valid, but may fail at runtime. To
|
|
// ensure that we only generate casts that always succeed at runtime, we check
|
|
// a fix extra conditions in `isGuaranteedCastCompatible`.
|
|
if (memref::CastOp::areCastCompatible(srcType, destType) &&
|
|
isGuaranteedCastCompatible(srcType, destType)) {
|
|
Value casted = b.create<memref::CastOp>(value.getLoc(), destType, value);
|
|
return casted;
|
|
}
|
|
|
|
auto loc = value.getLoc();
|
|
SmallVector<Value, 4> dynamicOperands;
|
|
for (int i = 0; i < destType.getRank(); ++i) {
|
|
if (destType.getShape()[i] != ShapedType::kDynamic)
|
|
continue;
|
|
Value size = b.create<memref::DimOp>(loc, value, i);
|
|
dynamicOperands.push_back(size);
|
|
}
|
|
// TODO: Use alloc/memcpy callback from BufferizationOptions if called via
|
|
// BufferizableOpInterface impl of ToMemrefOp.
|
|
Value copy = b.create<memref::AllocOp>(loc, destType, dynamicOperands);
|
|
b.create<memref::CopyOp>(loc, value, copy);
|
|
return copy;
|
|
}
|
|
|
|
/// Try to fold to_memref(to_tensor(x)). If x's type and the result type of the
|
|
/// to_memref op are different, a memref.cast is needed.
|
|
LogicalResult
|
|
mlir::bufferization::foldToMemrefToTensorPair(RewriterBase &rewriter,
|
|
ToMemrefOp toMemref) {
|
|
auto memrefToTensor = toMemref.getTensor().getDefiningOp<ToTensorOp>();
|
|
if (!memrefToTensor)
|
|
return failure();
|
|
|
|
Type srcType = memrefToTensor.getMemref().getType();
|
|
Type destType = toMemref.getType();
|
|
|
|
// Directly rewrite if the type did not change.
|
|
if (srcType == destType) {
|
|
rewriter.replaceOp(toMemref, memrefToTensor.getMemref());
|
|
return success();
|
|
}
|
|
|
|
auto rankedSrcType = llvm::dyn_cast<MemRefType>(srcType);
|
|
auto rankedDestType = llvm::dyn_cast<MemRefType>(destType);
|
|
auto unrankedSrcType = llvm::dyn_cast<UnrankedMemRefType>(srcType);
|
|
|
|
// Ranked memref -> Ranked memref cast.
|
|
if (rankedSrcType && rankedDestType) {
|
|
FailureOr<Value> replacement = castOrReallocMemRefValue(
|
|
rewriter, memrefToTensor.getMemref(), rankedDestType);
|
|
if (failed(replacement))
|
|
return failure();
|
|
|
|
rewriter.replaceOp(toMemref, *replacement);
|
|
return success();
|
|
}
|
|
|
|
// Unranked memref -> Ranked memref cast: May require a copy.
|
|
// TODO: Not implemented at the moment.
|
|
if (unrankedSrcType && rankedDestType)
|
|
return failure();
|
|
|
|
// Unranked memref -> unranked memref cast
|
|
// Ranked memref -> unranked memref cast: No copy needed.
|
|
assert(memref::CastOp::areCastCompatible(srcType, destType) &&
|
|
"expected that types are cast compatible");
|
|
rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, destType,
|
|
memrefToTensor.getMemref());
|
|
return success();
|
|
}
|
|
|
|
void mlir::bufferization::populateDynamicDimSizes(
|
|
OpBuilder &b, Location loc, Value shapedValue,
|
|
SmallVector<Value> &dynamicDims) {
|
|
auto shapedType = llvm::cast<ShapedType>(shapedValue.getType());
|
|
for (int64_t i = 0; i < shapedType.getRank(); ++i) {
|
|
if (shapedType.isDynamicDim(i)) {
|
|
if (llvm::isa<MemRefType>(shapedType)) {
|
|
dynamicDims.push_back(b.create<memref::DimOp>(loc, shapedValue, i));
|
|
} else {
|
|
assert(llvm::isa<RankedTensorType>(shapedType) && "expected tensor");
|
|
dynamicDims.push_back(b.create<tensor::DimOp>(loc, shapedValue, i));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// AllocTensorOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
|
|
const BufferizationOptions &options) {
|
|
OpBuilder::InsertionGuard g(rewriter);
|
|
Location loc = getLoc();
|
|
|
|
// Nothing to do for dead AllocTensorOps.
|
|
if (getOperation()->getUses().empty()) {
|
|
rewriter.eraseOp(getOperation());
|
|
return success();
|
|
}
|
|
|
|
// Get "copy" buffer.
|
|
Value copyBuffer;
|
|
if (getCopy()) {
|
|
FailureOr<Value> maybeCopyBuffer = getBuffer(rewriter, getCopy(), options);
|
|
if (failed(maybeCopyBuffer))
|
|
return failure();
|
|
copyBuffer = *maybeCopyBuffer;
|
|
}
|
|
|
|
// Create memory allocation.
|
|
auto allocType = bufferization::getBufferType(getResult(), options);
|
|
if (failed(allocType))
|
|
return failure();
|
|
SmallVector<Value> dynamicDims = getDynamicSizes();
|
|
if (getCopy()) {
|
|
assert(dynamicDims.empty() && "expected either `copy` or `dynamicDims`");
|
|
populateDynamicDimSizes(rewriter, loc, copyBuffer, dynamicDims);
|
|
}
|
|
FailureOr<Value> alloc = options.createAlloc(
|
|
rewriter, loc, llvm::cast<MemRefType>(*allocType), dynamicDims);
|
|
if (failed(alloc))
|
|
return failure();
|
|
|
|
// Create memory copy (if any).
|
|
if (getCopy()) {
|
|
if (failed(options.createMemCpy(rewriter, loc, copyBuffer, *alloc)))
|
|
return failure();
|
|
}
|
|
|
|
// Should the buffer be deallocated?
|
|
bool dealloc =
|
|
shouldDeallocateOpResult(llvm::cast<OpResult>(getResult()), options);
|
|
|
|
// Replace op.
|
|
replaceOpWithBufferizedValues(rewriter, getOperation(), *alloc);
|
|
|
|
// Create buffer deallocation (if requested).
|
|
if (!dealloc)
|
|
return success();
|
|
|
|
rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
|
|
if (failed(options.createDealloc(rewriter, loc, *alloc)))
|
|
return failure();
|
|
return success();
|
|
}
|
|
|
|
bool AllocTensorOp::resultBufferizesToMemoryWrite(OpResult opResult,
|
|
const AnalysisState &state) {
|
|
// AllocTensorOps do not write unless they have a `copy` value.
|
|
return static_cast<bool>(getCopy());
|
|
}
|
|
|
|
bool AllocTensorOp::bufferizesToMemoryRead(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
assert(opOperand.getOperandNumber() == getNumOperands() - 1 &&
|
|
"expected copy operand");
|
|
return true;
|
|
}
|
|
|
|
bool AllocTensorOp::bufferizesToMemoryWrite(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
assert(opOperand.getOperandNumber() == getNumOperands() - 1 &&
|
|
"expected copy operand");
|
|
return false;
|
|
}
|
|
|
|
AliasingOpResultList
|
|
AllocTensorOp::getAliasingOpResults(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
// This is a new allocation. It does not alias with any other buffer.
|
|
return {};
|
|
}
|
|
|
|
FailureOr<BaseMemRefType> AllocTensorOp::getBufferType(
|
|
Value value, const BufferizationOptions &options,
|
|
const DenseMap<Value, BaseMemRefType> &fixedTypes) {
|
|
assert(value == getResult() && "invalid value");
|
|
|
|
// Compute memory space of this allocation.
|
|
Attribute memorySpace;
|
|
if (getMemorySpace().has_value()) {
|
|
memorySpace = *getMemorySpace();
|
|
} else if (getCopy()) {
|
|
auto copyBufferType =
|
|
bufferization::getBufferType(getCopy(), options, fixedTypes);
|
|
if (failed(copyBufferType))
|
|
return failure();
|
|
memorySpace = copyBufferType->getMemorySpace();
|
|
} else if (options.defaultMemorySpace.has_value()) {
|
|
memorySpace = *options.defaultMemorySpace;
|
|
} else {
|
|
return getOperation()->emitError("could not infer memory space");
|
|
}
|
|
|
|
return getMemRefTypeWithStaticIdentityLayout(getType(), memorySpace);
|
|
}
|
|
|
|
LogicalResult AllocTensorOp::verify() {
|
|
if (getCopy() && !getDynamicSizes().empty())
|
|
return emitError("dynamic sizes not needed when copying a tensor");
|
|
if (!getCopy() && getType().getNumDynamicDims() !=
|
|
static_cast<int64_t>(getDynamicSizes().size()))
|
|
return emitError("expected ")
|
|
<< getType().getNumDynamicDims() << " dynamic sizes";
|
|
if (getCopy() && getCopy().getType() != getType())
|
|
return emitError("expected that `copy` and return type match");
|
|
|
|
// For sparse tensor allocation, we require that none of its
|
|
// uses escapes the function boundary directly.
|
|
if (sparse_tensor::getSparseTensorEncoding(getType())) {
|
|
for (auto &use : getOperation()->getUses())
|
|
if (isa<func::ReturnOp, func::CallOp, func::CallIndirectOp>(
|
|
use.getOwner()))
|
|
return emitError("sparse tensor allocation should not escape function");
|
|
}
|
|
|
|
return success();
|
|
}
|
|
|
|
void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
|
|
RankedTensorType type, ValueRange dynamicSizes) {
|
|
build(builder, result, type, dynamicSizes, /*copy=*/Value(),
|
|
/*size_hint=*/Value(),
|
|
/*memory_space=*/IntegerAttr());
|
|
}
|
|
|
|
void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
|
|
RankedTensorType type, ValueRange dynamicSizes,
|
|
Value copy) {
|
|
build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(),
|
|
/*memory_space=*/IntegerAttr());
|
|
}
|
|
|
|
void AllocTensorOp::build(OpBuilder &builder, OperationState &result,
|
|
TensorType type, ValueRange dynamicSizes, Value copy,
|
|
IntegerAttr memorySpace) {
|
|
build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(),
|
|
memorySpace);
|
|
}
|
|
|
|
namespace {
|
|
/// Change the type of the result of a `bufferization.alloc_tensor` by making
|
|
/// the result type statically sized along dimension that in the original
|
|
/// operation where defined as dynamic, but the size was defined using a
|
|
/// `constant` op. For example:
|
|
///
|
|
/// %c5 = arith.constant 5: index
|
|
/// %0 = bufferization.alloc_tensor(%arg0, %c5) : tensor<?x?xf32>
|
|
///
|
|
/// to
|
|
///
|
|
/// %0 = bufferization.alloc_tensor(%arg0) : tensor<?x5xf32>
|
|
struct ReplaceStaticShapeDims : OpRewritePattern<AllocTensorOp> {
|
|
using OpRewritePattern<AllocTensorOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(AllocTensorOp op,
|
|
PatternRewriter &rewriter) const override {
|
|
if (op.getCopy())
|
|
return failure();
|
|
SmallVector<int64_t> newShape = llvm::to_vector(op.getType().getShape());
|
|
SmallVector<Value> newDynamicSizes;
|
|
unsigned int dynValCounter = 0;
|
|
for (int64_t i = 0; i < op.getType().getRank(); ++i) {
|
|
if (!op.isDynamicDim(i))
|
|
continue;
|
|
Value value = op.getDynamicSizes()[dynValCounter++];
|
|
APInt intVal;
|
|
if (matchPattern(value, m_ConstantInt(&intVal))) {
|
|
newShape[i] = intVal.getSExtValue();
|
|
} else {
|
|
newDynamicSizes.push_back(value);
|
|
}
|
|
}
|
|
RankedTensorType newType = RankedTensorType::get(
|
|
newShape, op.getType().getElementType(), op.getType().getEncoding());
|
|
if (newType == op.getType())
|
|
return failure();
|
|
auto newOp = rewriter.create<AllocTensorOp>(
|
|
op.getLoc(), newType, newDynamicSizes, /*copy=*/Value());
|
|
rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
struct FoldDimOfAllocTensorOp : public OpRewritePattern<tensor::DimOp> {
|
|
using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(tensor::DimOp dimOp,
|
|
PatternRewriter &rewriter) const override {
|
|
std::optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
|
|
auto allocTensorOp = dimOp.getSource().getDefiningOp<AllocTensorOp>();
|
|
if (!allocTensorOp || !maybeConstantIndex)
|
|
return failure();
|
|
if (!allocTensorOp.getType().isDynamicDim(*maybeConstantIndex))
|
|
return failure();
|
|
rewriter.replaceOp(
|
|
dimOp, allocTensorOp.getDynamicSize(rewriter, *maybeConstantIndex));
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
void AllocTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
|
MLIRContext *ctx) {
|
|
results.add<FoldDimOfAllocTensorOp, ReplaceStaticShapeDims>(ctx);
|
|
}
|
|
|
|
LogicalResult AllocTensorOp::reifyResultShapes(
|
|
OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
|
|
auto shapes = llvm::to_vector<4>(
|
|
llvm::map_range(llvm::seq<int64_t>(0, getType().getRank()),
|
|
[&](int64_t dim) -> OpFoldResult {
|
|
if (isDynamicDim(dim))
|
|
return getDynamicSize(builder, dim);
|
|
return builder.getIndexAttr(getStaticSize(dim));
|
|
}));
|
|
reifiedReturnShapes.emplace_back(std::move(shapes));
|
|
return success();
|
|
}
|
|
|
|
ParseResult AllocTensorOp::parse(OpAsmParser &parser, OperationState &result) {
|
|
SmallVector<OpAsmParser::UnresolvedOperand> dynamicSizesOperands;
|
|
if (parser.parseLParen() || parser.parseOperandList(dynamicSizesOperands) ||
|
|
parser.parseRParen())
|
|
return failure();
|
|
ParseResult copyKeyword = parser.parseOptionalKeyword("copy");
|
|
OpAsmParser::UnresolvedOperand copyOperand;
|
|
if (copyKeyword.succeeded())
|
|
if (parser.parseLParen() || parser.parseOperand(copyOperand) ||
|
|
parser.parseRParen())
|
|
return failure();
|
|
ParseResult sizeHintKeyword = parser.parseOptionalKeyword("size_hint");
|
|
OpAsmParser::UnresolvedOperand sizeHintOperand;
|
|
if (sizeHintKeyword.succeeded())
|
|
if (parser.parseEqual() || parser.parseOperand(sizeHintOperand))
|
|
return failure();
|
|
if (parser.parseOptionalAttrDict(result.attributes) || parser.parseColon())
|
|
return failure();
|
|
|
|
TensorType type;
|
|
if (parser.parseCustomTypeWithFallback(type))
|
|
return failure();
|
|
result.addTypes(type);
|
|
|
|
Type indexType = parser.getBuilder().getIndexType();
|
|
if (parser.resolveOperands(dynamicSizesOperands, indexType, result.operands))
|
|
return failure();
|
|
if (copyKeyword.succeeded())
|
|
if (parser.resolveOperand(copyOperand, type, result.operands))
|
|
return failure();
|
|
if (sizeHintKeyword.succeeded())
|
|
if (parser.resolveOperand(sizeHintOperand, indexType, result.operands))
|
|
return failure();
|
|
result.addAttribute(AllocTensorOp::getOperandSegmentSizeAttr(),
|
|
parser.getBuilder().getDenseI32ArrayAttr(
|
|
{static_cast<int32_t>(dynamicSizesOperands.size()),
|
|
static_cast<int32_t>(copyKeyword.succeeded()),
|
|
static_cast<int32_t>(sizeHintKeyword.succeeded())}));
|
|
return success();
|
|
}
|
|
|
|
void AllocTensorOp::print(OpAsmPrinter &p) {
|
|
p << "(" << getDynamicSizes() << ")";
|
|
if (getCopy())
|
|
p << " copy(" << getCopy() << ")";
|
|
if (getSizeHint())
|
|
p << " size_hint=" << getSizeHint();
|
|
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
|
|
AllocTensorOp::getOperandSegmentSizeAttr()});
|
|
p << " : ";
|
|
auto type = getResult().getType();
|
|
if (auto validType = llvm::dyn_cast<::mlir::TensorType>(type))
|
|
p.printStrippedAttrOrType(validType);
|
|
else
|
|
p << type;
|
|
}
|
|
|
|
Value AllocTensorOp::getDynamicSize(OpBuilder &b, unsigned idx) {
|
|
assert(isDynamicDim(idx) && "expected dynamic dim");
|
|
if (getCopy())
|
|
return b.create<tensor::DimOp>(getLoc(), getCopy(), idx);
|
|
return getOperand(getIndexOfDynamicSize(idx));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// CopyTensorOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool CopyTensorOp::bufferizesToMemoryRead(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
if (&opOperand == &getOperation()->getOpOperand(0) /*source*/)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool CopyTensorOp::bufferizesToMemoryWrite(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
AliasingOpResultList
|
|
CopyTensorOp::getAliasingOpResults(OpOperand &opOperand,
|
|
const AnalysisState &state) {
|
|
if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
|
|
return {{getOperation()->getResult(0), BufferRelation::Equivalent}};
|
|
return {};
|
|
}
|
|
|
|
LogicalResult CopyTensorOp::bufferize(RewriterBase &rewriter,
|
|
const BufferizationOptions &options) {
|
|
FailureOr<Value> buffer = getBuffer(rewriter, getDest(), options);
|
|
if (failed(buffer))
|
|
return failure();
|
|
rewriter.create<memref::TensorStoreOp>(getLoc(), getSource(), *buffer);
|
|
replaceOpWithBufferizedValues(rewriter, getOperation(), *buffer);
|
|
return success();
|
|
}
|
|
|
|
LogicalResult CopyTensorOp::reifyResultShapes(
|
|
OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
|
|
reifiedReturnShapes.resize(1, SmallVector<OpFoldResult>(getType().getRank()));
|
|
reifiedReturnShapes[0] = tensor::getMixedSizes(builder, getLoc(), getDest());
|
|
return success();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// CloneOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
OpFoldResult CloneOp::fold(FoldAdaptor adaptor) {
|
|
return succeeded(memref::foldMemRefCast(*this)) ? getResult() : Value();
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Merge the clone and its source (by converting the clone to a cast) when
|
|
/// possible.
|
|
struct SimplifyClones : public OpRewritePattern<CloneOp> {
|
|
using OpRewritePattern<CloneOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(CloneOp cloneOp,
|
|
PatternRewriter &rewriter) const override {
|
|
if (cloneOp.use_empty()) {
|
|
rewriter.eraseOp(cloneOp);
|
|
return success();
|
|
}
|
|
|
|
Value source = cloneOp.getInput();
|
|
// Aims to find the dealloc op for the canonical source
|
|
// which otherwise could prevent removal of unnecessary allocs.
|
|
Value canonicalSource = source;
|
|
while (auto iface = dyn_cast_or_null<ViewLikeOpInterface>(
|
|
canonicalSource.getDefiningOp()))
|
|
canonicalSource = iface.getViewSource();
|
|
|
|
std::optional<Operation *> maybeCloneDeallocOp =
|
|
memref::findDealloc(cloneOp.getOutput());
|
|
// Skip if either of them has > 1 deallocate operations.
|
|
if (!maybeCloneDeallocOp.has_value())
|
|
return failure();
|
|
std::optional<Operation *> maybeSourceDeallocOp =
|
|
memref::findDealloc(canonicalSource);
|
|
if (!maybeSourceDeallocOp.has_value())
|
|
return failure();
|
|
Operation *cloneDeallocOp = *maybeCloneDeallocOp;
|
|
Operation *sourceDeallocOp = *maybeSourceDeallocOp;
|
|
|
|
// If both are deallocated in the same block, their in-block lifetimes
|
|
// might not fully overlap, so we cannot decide which one to drop.
|
|
if (cloneDeallocOp && sourceDeallocOp &&
|
|
cloneDeallocOp->getBlock() == sourceDeallocOp->getBlock())
|
|
return failure();
|
|
|
|
Block *currentBlock = cloneOp->getBlock();
|
|
Operation *redundantDealloc = nullptr;
|
|
if (cloneDeallocOp && cloneDeallocOp->getBlock() == currentBlock) {
|
|
redundantDealloc = cloneDeallocOp;
|
|
} else if (sourceDeallocOp && sourceDeallocOp->getBlock() == currentBlock) {
|
|
redundantDealloc = sourceDeallocOp;
|
|
}
|
|
|
|
if (!redundantDealloc)
|
|
return failure();
|
|
|
|
// Safety check that there are no other deallocations inbetween
|
|
// cloneOp and redundantDealloc, as otherwise we might deallocate an alias
|
|
// of source before the uses of the clone. With alias information, we could
|
|
// restrict this to only fail of the dealloc's operand is an alias
|
|
// of the source.
|
|
for (Operation *pos = cloneOp->getNextNode(); pos != redundantDealloc;
|
|
pos = pos->getNextNode()) {
|
|
auto effectInterface = dyn_cast<MemoryEffectOpInterface>(pos);
|
|
if (!effectInterface)
|
|
continue;
|
|
if (effectInterface.hasEffect<MemoryEffects::Free>())
|
|
return failure();
|
|
}
|
|
|
|
rewriter.replaceOpWithNewOp<memref::CastOp>(cloneOp, cloneOp.getType(),
|
|
source);
|
|
rewriter.eraseOp(redundantDealloc);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
void CloneOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
|
MLIRContext *context) {
|
|
results.add<SimplifyClones>(context);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// DeallocTensorOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
LogicalResult DeallocTensorOp::bufferize(RewriterBase &rewriter,
|
|
const BufferizationOptions &options) {
|
|
FailureOr<Value> buffer = getBuffer(rewriter, getTensor(), options);
|
|
if (failed(buffer))
|
|
return failure();
|
|
if (failed(options.createDealloc(rewriter, getLoc(), *buffer)))
|
|
return failure();
|
|
rewriter.eraseOp(getOperation());
|
|
return success();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// ToTensorOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool ToTensorOp::isWritable(Value value, const AnalysisState &state) {
|
|
return getWritable();
|
|
}
|
|
|
|
OpFoldResult ToTensorOp::fold(FoldAdaptor) {
|
|
if (auto toMemref = getMemref().getDefiningOp<ToMemrefOp>())
|
|
// Approximate alias analysis by conservatively folding only when no there
|
|
// is no interleaved operation.
|
|
if (toMemref->getBlock() == this->getOperation()->getBlock() &&
|
|
toMemref->getNextNode() == this->getOperation())
|
|
return toMemref.getTensor();
|
|
return {};
|
|
}
|
|
|
|
namespace {
|
|
struct DimOfToTensorFolder : public OpRewritePattern<tensor::DimOp> {
|
|
using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(tensor::DimOp dimOp,
|
|
PatternRewriter &rewriter) const override {
|
|
auto memrefToTensorOp = dimOp.getSource().getDefiningOp<ToTensorOp>();
|
|
if (!memrefToTensorOp)
|
|
return failure();
|
|
|
|
rewriter.replaceOpWithNewOp<memref::DimOp>(
|
|
dimOp, memrefToTensorOp.getMemref(), dimOp.getIndex());
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
void ToTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
|
MLIRContext *context) {
|
|
results.add<DimOfToTensorFolder>(context);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// ToMemrefOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
OpFoldResult ToMemrefOp::fold(FoldAdaptor) {
|
|
if (auto memrefToTensor = getTensor().getDefiningOp<ToTensorOp>())
|
|
if (memrefToTensor.getMemref().getType() == getType())
|
|
return memrefToTensor.getMemref();
|
|
return {};
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Replace tensor.cast + to_memref by to_memref + memref.cast.
|
|
struct ToMemrefOfCast : public OpRewritePattern<ToMemrefOp> {
|
|
using OpRewritePattern<ToMemrefOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(ToMemrefOp toMemref,
|
|
PatternRewriter &rewriter) const final {
|
|
auto tensorCastOperand =
|
|
toMemref.getOperand().getDefiningOp<tensor::CastOp>();
|
|
if (!tensorCastOperand)
|
|
return failure();
|
|
auto srcTensorType = llvm::dyn_cast<RankedTensorType>(
|
|
tensorCastOperand.getOperand().getType());
|
|
if (!srcTensorType)
|
|
return failure();
|
|
auto memrefType = MemRefType::get(srcTensorType.getShape(),
|
|
srcTensorType.getElementType());
|
|
Value memref = rewriter.create<ToMemrefOp>(toMemref.getLoc(), memrefType,
|
|
tensorCastOperand.getOperand());
|
|
rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, toMemref.getType(),
|
|
memref);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
/// Canonicalize bufferization.to_tensor + bufferization.to_memref. Insert a
|
|
/// cast if necessary.
|
|
struct ToMemrefToTensorFolding : public OpRewritePattern<ToMemrefOp> {
|
|
using OpRewritePattern<ToMemrefOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(ToMemrefOp toMemref,
|
|
PatternRewriter &rewriter) const final {
|
|
return foldToMemrefToTensorPair(rewriter, toMemref);
|
|
}
|
|
};
|
|
|
|
/// Fold a load on a to_memref operation into an tensor.extract on the
|
|
/// corresponding tensor.
|
|
struct LoadOfToMemref : public OpRewritePattern<memref::LoadOp> {
|
|
using OpRewritePattern<memref::LoadOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(memref::LoadOp load,
|
|
PatternRewriter &rewriter) const override {
|
|
auto toMemref = load.getMemref().getDefiningOp<ToMemrefOp>();
|
|
if (!toMemref)
|
|
return failure();
|
|
|
|
rewriter.replaceOpWithNewOp<tensor::ExtractOp>(load, toMemref.getTensor(),
|
|
load.getIndices());
|
|
return success();
|
|
}
|
|
};
|
|
|
|
/// Fold dim of a to_memref into the dim of the tensor.
|
|
struct DimOfCastOp : public OpRewritePattern<memref::DimOp> {
|
|
using OpRewritePattern<memref::DimOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(memref::DimOp dimOp,
|
|
PatternRewriter &rewriter) const override {
|
|
auto castOp = dimOp.getSource().getDefiningOp<ToMemrefOp>();
|
|
if (!castOp)
|
|
return failure();
|
|
Value newSource = castOp.getOperand();
|
|
rewriter.replaceOpWithNewOp<tensor::DimOp>(dimOp, newSource,
|
|
dimOp.getIndex());
|
|
return success();
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
|
MLIRContext *context) {
|
|
results.add<DimOfCastOp, LoadOfToMemref, ToMemrefOfCast,
|
|
ToMemrefToTensorFolding>(context);
|
|
}
|
|
|
|
LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
|
|
const BufferizationOptions &options) {
|
|
// Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
|
|
(void)foldToMemrefToTensorPair(rewriter, *this);
|
|
// Note: The return value of `bufferize` indicates whether there was an error
|
|
// or not. (And not whether the pattern matched or not.)
|
|
return success();
|
|
}
|
|
|
|
std::optional<Operation *> CloneOp::buildDealloc(OpBuilder &builder,
|
|
Value alloc) {
|
|
return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
|
|
.getOperation();
|
|
}
|
|
|
|
std::optional<Value> CloneOp::buildClone(OpBuilder &builder, Value alloc) {
|
|
return builder.create<CloneOp>(alloc.getLoc(), alloc).getResult();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// DeallocOp
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
LogicalResult DeallocOp::inferReturnTypes(
|
|
MLIRContext *context, std::optional<::mlir::Location> location,
|
|
ValueRange operands, DictionaryAttr attributes, OpaqueProperties properties,
|
|
RegionRange regions, SmallVectorImpl<Type> &inferredReturnTypes) {
|
|
DeallocOpAdaptor adaptor(operands, attributes, properties, regions);
|
|
inferredReturnTypes = SmallVector<Type>(adaptor.getRetained().size(),
|
|
IntegerType::get(context, 1));
|
|
return success();
|
|
}
|
|
|
|
LogicalResult DeallocOp::verify() {
|
|
if (getMemrefs().size() != getConditions().size())
|
|
return emitOpError(
|
|
"must have the same number of conditions as memrefs to deallocate");
|
|
return success();
|
|
}
|
|
|
|
static LogicalResult updateDeallocIfChanged(DeallocOp deallocOp,
|
|
ValueRange memrefs,
|
|
ValueRange conditions,
|
|
PatternRewriter &rewriter) {
|
|
if (deallocOp.getMemrefs() == memrefs &&
|
|
deallocOp.getConditions() == conditions)
|
|
return failure();
|
|
|
|
rewriter.updateRootInPlace(deallocOp, [&]() {
|
|
deallocOp.getMemrefsMutable().assign(memrefs);
|
|
deallocOp.getConditionsMutable().assign(conditions);
|
|
});
|
|
return success();
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Remove duplicate values in the list of memrefs to be deallocated. We need to
|
|
/// make sure the corresponding condition value is updated accordingly since
|
|
/// their two conditions might not cover the same set of cases. In that case, we
|
|
/// have to combine them (by computing the disjunction of them).
|
|
/// Example:
|
|
/// ```mlir
|
|
/// bufferization.dealloc (%arg0, %arg0 : ...) if (%arg1, %arg2)
|
|
/// ```
|
|
/// is canonicalized to
|
|
/// ```mlir
|
|
/// %0 = arith.ori %arg1, %arg2 : i1
|
|
/// bufferization.dealloc (%arg0 : memref<2xi32>) if (%0)
|
|
/// ```
|
|
struct DeallocRemoveDuplicateDeallocMemrefs
|
|
: public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Unique memrefs to be deallocated.
|
|
DenseMap<Value, unsigned> memrefToCondition;
|
|
SmallVector<Value> newMemrefs, newConditions;
|
|
for (auto [i, memref, cond] :
|
|
llvm::enumerate(deallocOp.getMemrefs(), deallocOp.getConditions())) {
|
|
if (memrefToCondition.count(memref)) {
|
|
// If the dealloc conditions don't match, we need to make sure that the
|
|
// dealloc happens on the union of cases.
|
|
Value &newCond = newConditions[memrefToCondition[memref]];
|
|
if (newCond != cond)
|
|
newCond =
|
|
rewriter.create<arith::OrIOp>(deallocOp.getLoc(), newCond, cond);
|
|
} else {
|
|
memrefToCondition.insert({memref, newConditions.size()});
|
|
newMemrefs.push_back(memref);
|
|
newConditions.push_back(cond);
|
|
}
|
|
}
|
|
|
|
// Return failure if we don't change anything such that we don't run into an
|
|
// infinite loop of pattern applications.
|
|
return updateDeallocIfChanged(deallocOp, newMemrefs, newConditions,
|
|
rewriter);
|
|
}
|
|
};
|
|
|
|
/// Remove duplicate values in the list of retained memrefs. We need to make
|
|
/// sure the corresponding result condition value is replaced properly.
|
|
/// Example:
|
|
/// ```mlir
|
|
/// %0:2 = bufferization.dealloc retain (%arg3, %arg3 : ...)
|
|
/// ```
|
|
/// is canonicalized to
|
|
/// ```mlir
|
|
/// %0 = bufferization.dealloc retain (%arg3 : memref<2xi32>)
|
|
/// ```
|
|
struct DeallocRemoveDuplicateRetainedMemrefs
|
|
: public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Unique retained values
|
|
DenseMap<Value, unsigned> seen;
|
|
SmallVector<Value> newRetained;
|
|
SmallVector<unsigned> resultReplacementIdx;
|
|
unsigned i = 0;
|
|
for (auto retained : deallocOp.getRetained()) {
|
|
if (seen.count(retained)) {
|
|
resultReplacementIdx.push_back(seen[retained]);
|
|
continue;
|
|
}
|
|
|
|
seen[retained] = i;
|
|
newRetained.push_back(retained);
|
|
resultReplacementIdx.push_back(i++);
|
|
}
|
|
|
|
// Return failure if we don't change anything such that we don't run into an
|
|
// infinite loop of pattern applications.
|
|
if (newRetained.size() == deallocOp.getRetained().size())
|
|
return failure();
|
|
|
|
// We need to create a new op because the number of results is always the
|
|
// same as the number of condition operands.
|
|
auto newDeallocOp =
|
|
rewriter.create<DeallocOp>(deallocOp.getLoc(), deallocOp.getMemrefs(),
|
|
deallocOp.getConditions(), newRetained);
|
|
SmallVector<Value> replacements(
|
|
llvm::map_range(resultReplacementIdx, [&](unsigned idx) {
|
|
return newDeallocOp.getUpdatedConditions()[idx];
|
|
}));
|
|
rewriter.replaceOp(deallocOp, replacements);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
/// Remove memrefs to be deallocated that are also present in the retained list
|
|
/// since they will always alias and thus never actually be deallocated.
|
|
/// Example:
|
|
/// ```mlir
|
|
/// %0 = bufferization.dealloc (%arg0 : ...) if (%arg1) retain (%arg0 : ...)
|
|
/// ```
|
|
/// is canonicalized to
|
|
/// ```mlir
|
|
/// %0 = bufferization.dealloc retain (%arg0 : ...)
|
|
/// ```
|
|
struct DeallocRemoveDeallocMemrefsContainedInRetained
|
|
: public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Unique memrefs to be deallocated.
|
|
DenseMap<Value, unsigned> retained;
|
|
for (auto [i, ret] : llvm::enumerate(deallocOp.getRetained()))
|
|
retained[ret] = i;
|
|
|
|
// There must not be any duplicates in the retain list anymore because we
|
|
// would miss updating one of the result values otherwise.
|
|
if (retained.size() != deallocOp.getRetained().size())
|
|
return failure();
|
|
|
|
SmallVector<Value> newMemrefs, newConditions;
|
|
for (auto [memref, cond] :
|
|
llvm::zip(deallocOp.getMemrefs(), deallocOp.getConditions())) {
|
|
if (retained.contains(memref)) {
|
|
rewriter.setInsertionPointAfter(deallocOp);
|
|
auto orOp = rewriter.create<arith::OrIOp>(
|
|
deallocOp.getLoc(),
|
|
deallocOp.getUpdatedConditions()[retained[memref]], cond);
|
|
rewriter.replaceAllUsesExcept(
|
|
deallocOp.getUpdatedConditions()[retained[memref]],
|
|
orOp.getResult(), orOp);
|
|
continue;
|
|
}
|
|
|
|
newMemrefs.push_back(memref);
|
|
newConditions.push_back(cond);
|
|
}
|
|
|
|
// Return failure if we don't change anything such that we don't run into an
|
|
// infinite loop of pattern applications.
|
|
return updateDeallocIfChanged(deallocOp, newMemrefs, newConditions,
|
|
rewriter);
|
|
}
|
|
};
|
|
|
|
/// Erase deallocation operations where the variadic list of memrefs to
|
|
/// deallocate is empty. Example:
|
|
/// ```mlir
|
|
/// %0 = bufferization.dealloc retain (%arg0: memref<2xi32>)
|
|
/// ```
|
|
struct EraseEmptyDealloc : public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
if (deallocOp.getMemrefs().empty()) {
|
|
Value constFalse = rewriter.create<arith::ConstantOp>(
|
|
deallocOp.getLoc(), rewriter.getBoolAttr(false));
|
|
rewriter.replaceOp(
|
|
deallocOp, SmallVector<Value>(deallocOp.getUpdatedConditions().size(),
|
|
constFalse));
|
|
return success();
|
|
}
|
|
return failure();
|
|
}
|
|
};
|
|
|
|
/// Removes memrefs from the deallocation list if their associated condition is
|
|
/// always 'false'.
|
|
///
|
|
/// Example:
|
|
/// ```
|
|
/// bufferization.dealloc (%arg0, %arg1 : memref<2xi32>, memref<2xi32>)
|
|
/// if (%arg2, %false)
|
|
/// ```
|
|
/// becomes
|
|
/// ```
|
|
/// bufferization.dealloc (%arg0 : memref<2xi32>) if (%arg2)
|
|
/// ```
|
|
struct EraseAlwaysFalseDealloc : public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
SmallVector<Value> newMemrefs, newConditions;
|
|
for (auto [memref, cond] :
|
|
llvm::zip(deallocOp.getMemrefs(), deallocOp.getConditions())) {
|
|
if (!matchPattern(cond, m_Zero())) {
|
|
newMemrefs.push_back(memref);
|
|
newConditions.push_back(cond);
|
|
}
|
|
}
|
|
|
|
return updateDeallocIfChanged(deallocOp, newMemrefs, newConditions,
|
|
rewriter);
|
|
}
|
|
};
|
|
|
|
/// The `memref.extract_strided_metadata` is often inserted to get the base
|
|
/// memref if the operand is not already guaranteed to be the result of a memref
|
|
/// allocation operation. This canonicalization pattern removes this extraction
|
|
/// operation if the operand is now produced by an allocation operation (e.g.,
|
|
/// due to other canonicalizations simplifying the IR).
|
|
///
|
|
/// Example:
|
|
/// ```mlir
|
|
/// %alloc = memref.alloc() : memref<2xi32>
|
|
/// %base_memref, %offset, %size, %stride = memref.extract_strided_metadata
|
|
/// %alloc : memref<2xi32> -> memref<i32>, index, index, index
|
|
/// bufferization.dealloc (%base_memref : memref<i32>) if (%cond)
|
|
/// ```
|
|
/// is canonicalized to
|
|
/// ```mlir
|
|
/// %alloc = memref.alloc() : memref<2xi32>
|
|
/// bufferization.dealloc (%alloc : memref<2xi32>) if (%cond)
|
|
/// ```
|
|
struct SkipExtractMetadataOfAlloc : public OpRewritePattern<DeallocOp> {
|
|
using OpRewritePattern<DeallocOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(DeallocOp deallocOp,
|
|
PatternRewriter &rewriter) const override {
|
|
SmallVector<Value> newMemrefs(
|
|
llvm::map_range(deallocOp.getMemrefs(), [&](Value memref) {
|
|
auto extractStridedOp =
|
|
memref.getDefiningOp<memref::ExtractStridedMetadataOp>();
|
|
if (!extractStridedOp)
|
|
return memref;
|
|
Value allocMemref = extractStridedOp.getOperand();
|
|
auto allocOp = allocMemref.getDefiningOp<MemoryEffectOpInterface>();
|
|
if (!allocOp)
|
|
return memref;
|
|
if (allocOp.getEffectOnValue<MemoryEffects::Allocate>(allocMemref))
|
|
return allocMemref;
|
|
return memref;
|
|
}));
|
|
|
|
return updateDeallocIfChanged(deallocOp, newMemrefs,
|
|
deallocOp.getConditions(), rewriter);
|
|
}
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
void DeallocOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
|
MLIRContext *context) {
|
|
results.add<DeallocRemoveDuplicateDeallocMemrefs,
|
|
DeallocRemoveDuplicateRetainedMemrefs,
|
|
DeallocRemoveDeallocMemrefsContainedInRetained, EraseEmptyDealloc,
|
|
EraseAlwaysFalseDealloc, SkipExtractMetadataOfAlloc>(context);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TableGen'd op method definitions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define GET_OP_CLASSES
|
|
#include "mlir/Dialect/Bufferization/IR/BufferizationOps.cpp.inc"
|