[mlir][bufferization] Move ModuleBufferization to bufferization dialect

* Move Module Bufferization to the bufferization dialect. The implementation is split into `OneShotModuleBufferize.cpp` and `FuncBufferizableOpInterfaceImpl.cpp`, so that the external model implementation can be easily moved to the func dialect in the future.
* Split and clean up test cases. A few test cases are still remaining in Linalg and will be updated separately.
* `linalg.inplaceable` is renamed to `bufferization.writable` to accurately reflect its current usage.
* Attributes and their verifiers are moved from the Linalg dialect to the Bufferization dialect.
* Expand documentation.
* Add a new flag to One-Shot Bufferize to allow for function boundary bufferization.

Differential Revision: https://reviews.llvm.org/D122229
This commit is contained in:
Matthias Springer
2022-04-22 18:08:44 +09:00
parent 3e1d2c352c
commit e07a7fd5c0
33 changed files with 1620 additions and 1487 deletions

View File

@@ -326,17 +326,12 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
&& !bufferizableOp.getAliasingOpResult(opOperand, state).empty();
}
// TODO: The following two attributes should belong to the tensor dialect.
// The corresponding verifier should also be in the tensor dialect.
// TODO: This attribute is deprecated. Use `bufferization.writable` or add
// a new attribute in a different dialect.
/// Attribute name used to mark region arguments that can be bufferized
/// in-place during one-shot bufferization.
constexpr const static ::llvm::StringLiteral
kInplaceableAttrName = "linalg.inplaceable";
/// Attribute name used to mark the bufferization layout for region
/// arguments during one-shot bufferization.
constexpr const static ::llvm::StringLiteral
kBufferLayoutAttrName = "linalg.buffer_layout";
kInplaceableAttrName = "linalg.inplaceable";
}];
}

View File

@@ -26,6 +26,19 @@ def Bufferization_Dialect : Dialect {
deallocation](/docs/BufferDeallocationInternals/).
}];
let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"];
let extraClassDeclaration = [{
/// An attribute that can override writability of buffers of tensor function
/// arguments during One-Shot Module Bufferize.
constexpr const static ::llvm::StringLiteral
kWritableAttrName = "bufferization.writable";
/// Attribute name used to mark the bufferization layout for region
/// arguments during One-Shot Module Bufferize.
constexpr const static ::llvm::StringLiteral
kBufferLayoutAttrName = "bufferization.buffer_layout";
}];
let hasOperationAttrVerify = 1;
}
#endif // BUFFERIZATION_BASE

View File

@@ -0,0 +1,76 @@
//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H
#define MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
namespace mlir {
class DialectRegistry;
namespace func {
class FuncOp;
} // namespace func
namespace bufferization {
namespace func_ext {
/// The state of analysis of a FuncOp.
enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };
using func::FuncOp;
/// Extra analysis state that is required for bufferization of function
/// boundaries.
struct FuncAnalysisState : public DialectAnalysisState {
// Note: Function arguments and/or function return values may disappear during
// bufferization. Functions and their CallOps are analyzed and bufferized
// separately. To ensure that a CallOp analysis/bufferization can access an
// already bufferized function's analysis results, we store bbArg/return value
// indices instead of BlockArguments/OpOperand pointers.
/// A set of block argument indices.
using BbArgIndexSet = DenseSet<int64_t>;
/// A mapping of indices to indices.
using IndexMapping = DenseMap<int64_t, int64_t>;
/// A mapping of indices to a list of indices.
using IndexToIndexListMapping = DenseMap<int64_t, SmallVector<int64_t>>;
/// A mapping of ReturnOp OpOperand indices to equivalent FuncOp BBArg
/// indices.
DenseMap<FuncOp, IndexMapping> equivalentFuncArgs;
/// A mapping of ReturnOp OpOperand indices to aliasing FuncOp BBArg indices.
DenseMap<FuncOp, IndexToIndexListMapping> aliasingFuncArgs;
/// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices.
DenseMap<FuncOp, IndexToIndexListMapping> aliasingReturnVals;
/// A set of all read BlockArguments of FuncOps.
DenseMap<FuncOp, BbArgIndexSet> readBbArgs;
/// A set of all written-to BlockArguments of FuncOps.
DenseMap<FuncOp, BbArgIndexSet> writtenBbArgs;
/// Keep track of which FuncOps are fully analyzed or currently being
/// analyzed.
DenseMap<FuncOp, FuncOpAnalysisState> analyzedFuncOps;
/// This function is called right before analyzing the given FuncOp. It
/// initializes the data structures for the FuncOp in this state object.
void startFunctionAnalysis(FuncOp funcOp);
};
void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
} // namespace func_ext
} // namespace bufferization
} // namespace mlir
#endif // MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H

View File

@@ -0,0 +1,31 @@
//===- OneShotModuleBufferize.h - Bufferization across Func. Boundaries ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H
#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H
namespace mlir {
struct LogicalResult;
class ModuleOp;
namespace bufferization {
struct OneShotBufferizationOptions;
/// Run One-Shot Module Bufferization on the given module. Performs a simple
/// function call analysis to determine which function arguments are
/// inplaceable. Then analyzes and bufferizes FuncOps one-by-one with One-Shot
/// Bufferize.
LogicalResult
runOneShotModuleBufferize(ModuleOp moduleOp,
bufferization::OneShotBufferizationOptions options);
} // namespace bufferization
} // namespace mlir
#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H

View File

@@ -200,6 +200,34 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
prints analysis results and explains why an OpOperand was decided to
bufferize out-of-place. This is useful for understanding why One-Shot
Bufferize chose to insert a certain buffer copy.
`bufferize-function-boundaries` is an experimental flag for bufferizing
`FuncOp`, `ReturnOp` and `CallOp`. This feature is still under development
and supports only simple cases at the moment. In particular:
* Recursive or circular function call graphs are not supported.
* If a newly allocated buffer is returned from a function (with
`allow-return-allocs`), the buffer will never be deallocated and leak.
Such IR needs special handling, e.g., allocation hoisting or reference
counting.
* External functions (without bodies) that return a tensor are not
supported.
* Function with multiple blocks or multiple ReturnOps are not supported.
One-Shot Bufferize implements the following contract around function calls:
The buffer of function arguments is always writable (unless annotated with
`bufferization.writable = false`). A buffer copy may be inserted at the call
site where necessary. Alias sets and equivalence info is propagated through
function calls. Whenever a function is bufferized, all other functions that
are being called were already analyzed and bufferized, so exact alias and
equivalence information is available. This is why recursive function calls
are not yet supported.
One-Shot Bufferize gathers additional information during the analysis phase
when function boundary bufferization is activated. E.g., whether a function
argument is read/written and which returned values are aliasing/equivalent.
For debugging purposes, such information can be printed with
`test-analysis-only`.
}];
let options = [
Option<"allowReturnAllocs", "allow-return-allocs", "bool",
@@ -211,6 +239,9 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
/*default=*/"0",
"Test only: Analyze ops in random order with a given seed (fuzzer)">,
Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
"bool", /*default=*/"0",
"Bufferize function boundaries (experimental).">,
Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
"Specify if buffers should be deallocated. For compatibility with "
"core bufferization passes.">,

View File

@@ -1,4 +1,3 @@
add_subdirectory(ComprehensiveBufferize)
add_subdirectory(IR)
set(LLVM_TARGET_DEFINITIONS Passes.td)

View File

@@ -1,2 +0,0 @@
# no targets defined here

View File

@@ -1,43 +0,0 @@
//===- ModuleBufferization.h - Bufferization across Func. Boundaries ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H
#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H
#include <memory>
namespace mlir {
class DialectRegistry;
struct LogicalResult;
class ModuleOp;
namespace bufferization {
struct OneShotBufferizationOptions;
} // namespace bufferization
namespace linalg {
namespace comprehensive_bufferize {
/// Run Module Bufferization on the given module. Performs a simple function
/// call analysis to determine which function arguments are inplaceable. Then
/// analyzes and bufferizes FuncOps one-by-one with One-Shot Bufferize.
LogicalResult
runModuleBufferize(ModuleOp moduleOp,
bufferization::OneShotBufferizationOptions options);
namespace std_ext {
void registerModuleBufferizationExternalModels(DialectRegistry &registry);
} // namespace std_ext
} // namespace comprehensive_bufferize
} // namespace linalg
} // namespace mlir
#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H

View File

@@ -22,6 +22,7 @@
#include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
#include "mlir/Dialect/DLTI/DLTI.h"
@@ -46,6 +47,7 @@
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
#include "mlir/Dialect/Shape/IR/Shape.h"
#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
@@ -100,8 +102,11 @@ inline void registerAllDialects(DialectRegistry &registry) {
x86vector::X86VectorDialect>();
// clang-format on
arith::registerBufferizableOpInterfaceExternalModels(registry);
bufferization::func_ext::registerBufferizableOpInterfaceExternalModels(
registry);
linalg::registerBufferizableOpInterfaceExternalModels(registry);
scf::registerBufferizableOpInterfaceExternalModels(registry);
shape::registerBufferizableOpInterfaceExternalModels(registry);
tensor::registerBufferizableOpInterfaceExternalModels(registry);
tensor::registerInferTypeOpInterfaceExternalModels(registry);
tensor::registerTilingOpInterfaceExternalModels(registry);

View File

@@ -33,11 +33,6 @@ namespace bufferization {
using namespace mlir;
using namespace bufferization;
/// Attribute name used to mark the bufferization layout for region
/// arguments during linalg comprehensive bufferization.
constexpr const ::llvm::StringLiteral
bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
/// Attribute name used to mark region arguments that can be bufferized
/// in-place during linalg comprehensive bufferization.
constexpr const ::llvm::StringLiteral

View File

@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/IR/FunctionInterfaces.h"
#include "mlir/Transforms/InliningUtils.h"
using namespace mlir;
@@ -14,6 +15,15 @@ using namespace mlir::bufferization;
#include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.cpp.inc"
/// Attribute name used to mark function arguments who's buffers can be written
/// to during One-Shot Module Bufferize.
constexpr const ::llvm::StringLiteral BufferizationDialect::kWritableAttrName;
/// Attribute name used to mark the bufferization layout for region arguments
/// during One-Shot Module Bufferize.
constexpr const ::llvm::StringLiteral
BufferizationDialect::kBufferLayoutAttrName;
//===----------------------------------------------------------------------===//
// Bufferization Dialect Interfaces
//===----------------------------------------------------------------------===//
@@ -41,3 +51,33 @@ void mlir::bufferization::BufferizationDialect::initialize() {
>();
addInterfaces<BufferizationInlinerInterface>();
}
LogicalResult
BufferizationDialect::verifyOperationAttribute(Operation *op,
NamedAttribute attr) {
using bufferization::BufferizableOpInterface;
if (attr.getName() == kWritableAttrName) {
if (!attr.getValue().isa<BoolAttr>()) {
return op->emitError() << "'" << kWritableAttrName
<< "' is expected to be a boolean attribute";
}
if (!isa<FunctionOpInterface>(op))
return op->emitError() << "expected " << attr.getName()
<< " to be used on function-like operations";
return success();
}
if (attr.getName() == kBufferLayoutAttrName) {
if (!attr.getValue().isa<AffineMapAttr>()) {
return op->emitError() << "'" << kBufferLayoutAttrName
<< "' is expected to be a affine map attribute";
}
if (!isa<FunctionOpInterface>(op))
return op->emitError() << "expected " << attr.getName()
<< " to be used on function-like operations";
return success();
}
return op->emitError() << "attribute '" << attr.getName()
<< "' not supported by the bufferization dialect";
}

View File

@@ -12,6 +12,7 @@
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Operation.h"
@@ -178,8 +179,10 @@ struct OneShotBufferizePass
BufferizationOptions::OpFilterEntry::FilterFn filterFn =
[&](Operation *op) {
// Disallow non-func dialect ops. I.e., no ops related to function
// calls.
if (isa<func::FuncDialect>(op->getDialect()))
// calls. (Unless explicitly activated.)
bool isFuncBoundaryOp =
isa_and_nonnull<func::FuncDialect>(op->getDialect());
if (!this->bufferizeFunctionBoundaries && isFuncBoundaryOp)
return false;
// Filter may be specified via options.
if (this->dialectFilter.hasValue())
@@ -195,9 +198,16 @@ struct OneShotBufferizePass
}
ModuleOp moduleOp = getOperation();
if (failed(runOneShotBufferize(moduleOp, opt))) {
signalPassFailure();
return;
if (bufferizeFunctionBoundaries) {
if (failed(runOneShotModuleBufferize(moduleOp, opt))) {
signalPassFailure();
return;
}
} else {
if (failed(runOneShotBufferize(moduleOp, opt))) {
signalPassFailure();
return;
}
}
if (opt.testAnalysisOnly)

View File

@@ -4,7 +4,9 @@ add_mlir_dialect_library(MLIRBufferizationTransforms
BufferOptimizations.cpp
BufferResultsToOutParams.cpp
BufferUtils.cpp
FuncBufferizableOpInterfaceImpl.cpp
OneShotAnalysis.cpp
OneShotModuleBufferize.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization

View File

@@ -0,0 +1,542 @@
//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Operation.h"
namespace mlir {
namespace bufferization {
namespace func_ext {
void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) {
analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress;
auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping());
auto createdAliasingOperands =
aliasingFuncArgs.try_emplace(funcOp, IndexToIndexListMapping());
auto createdAliasingResults =
aliasingReturnVals.try_emplace(funcOp, IndexToIndexListMapping());
auto createdRead = readBbArgs.try_emplace(funcOp, BbArgIndexSet());
auto createdWritten = writtenBbArgs.try_emplace(funcOp, BbArgIndexSet());
(void)createdEquiv;
(void)createdAliasingOperands;
(void)createdAliasingResults;
(void)createdRead;
(void)createdWritten;
#ifndef NDEBUG
assert(createdEquiv.second && "equivalence info exists already");
assert(createdAliasingOperands.second && "aliasing info exists already");
assert(createdAliasingResults.second && "aliasing info exists already");
assert(createdRead.second && "bbarg access info exists already");
assert(createdWritten.second && "bbarg access info exists already");
#endif // NDEBUG
}
/// Return the unique ReturnOp that terminates `funcOp`.
/// Return nullptr if there is no such unique ReturnOp.
static func::ReturnOp getAssumedUniqueReturnOp(FuncOp funcOp) {
func::ReturnOp returnOp;
for (Block &b : funcOp.getBody()) {
if (auto candidateOp = dyn_cast<func::ReturnOp>(b.getTerminator())) {
if (returnOp)
return nullptr;
returnOp = candidateOp;
}
}
return returnOp;
}
/// Return the index-th bufferized function argument type. This assumes that the
/// specified argument is a tensor. If the tensor is ranked, a layout map may be
/// specified by the user. If no layout map is specified, a fully dynamic map is
/// used.
static BaseMemRefType
getBufferizedFunctionArgType(FuncOp funcOp, int64_t index,
const BufferizationOptions &options) {
auto tensorType =
funcOp.getFunctionType().getInput(index).dyn_cast<TensorType>();
assert(tensorType && "expected TensorType");
BaseMemRefType memrefType = getMemRefType(tensorType, options);
auto layoutAttr = funcOp.getArgAttrOfType<AffineMapAttr>(
index, BufferizationDialect::kBufferLayoutAttrName);
if (!layoutAttr)
return memrefType;
auto rankedMemrefType = memrefType.dyn_cast<MemRefType>();
assert(rankedMemrefType && "buffer layout not supported on unranked tensors");
return MemRefType::get(
rankedMemrefType.getShape(), rankedMemrefType.getElementType(),
layoutAttr.getValue(), rankedMemrefType.getMemorySpaceAsInt());
}
/// Return the FuncOp called by `callOp`.
static FuncOp getCalledFunction(CallOpInterface callOp) {
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
if (!sym)
return nullptr;
return dyn_cast_or_null<FuncOp>(
SymbolTable::lookupNearestSymbolFrom(callOp, sym));
}
/// Get FuncAnalysisState.
static const FuncAnalysisState &
getFuncAnalysisState(const AnalysisState &state) {
Optional<const FuncAnalysisState *> maybeState =
state.getDialectState<FuncAnalysisState>(
func::FuncDialect::getDialectNamespace());
assert(maybeState.hasValue() && "FuncAnalysisState does not exist");
return **maybeState;
}
/// Return the state (phase) of analysis of the FuncOp.
static FuncOpAnalysisState getFuncOpAnalysisState(const AnalysisState &state,
FuncOp funcOp) {
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
auto it = funcState.analyzedFuncOps.find(funcOp);
if (it == funcState.analyzedFuncOps.end())
return FuncOpAnalysisState::NotAnalyzed;
return it->second;
}
/// Return the index of the bbArg in the given FuncOp that is equivalent to the
/// specified return value (if any).
static Optional<int64_t> getEquivalentFuncArgIdx(FuncOp funcOp,
const FuncAnalysisState &state,
int64_t returnValIdx) {
auto funcOpIt = state.equivalentFuncArgs.find(funcOp);
if (funcOpIt == state.equivalentFuncArgs.end())
// No equivalence info stores for funcOp.
return None;
auto retValIt = funcOpIt->getSecond().find(returnValIdx);
if (retValIt == funcOpIt->getSecond().end())
// Return value has no equivalent bbArg.
return None;
return retValIt->getSecond();
}
struct CallOpInterface
: public BufferizableOpInterface::ExternalModel<CallOpInterface,
func::CallOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
func::CallOp callOp = cast<func::CallOp>(op);
FuncOp funcOp = getCalledFunction(callOp);
assert(funcOp && "expected CallOp to a FuncOp");
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
if (getFuncOpAnalysisState(state, funcOp) != FuncOpAnalysisState::Analyzed)
// FuncOp not analyzed yet. Assume that OpOperand is read.
return true;
return funcState.readBbArgs.lookup(funcOp).contains(
opOperand.getOperandNumber());
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
func::CallOp callOp = cast<func::CallOp>(op);
FuncOp funcOp = getCalledFunction(callOp);
assert(funcOp && "expected CallOp to a FuncOp");
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
if (getFuncOpAnalysisState(state, funcOp) != FuncOpAnalysisState::Analyzed)
// FuncOp not analyzed yet. Assume that OpOperand is written.
return true;
return funcState.writtenBbArgs.lookup(funcOp).contains(
opOperand.getOperandNumber());
}
SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
func::CallOp callOp = cast<func::CallOp>(op);
FuncOp funcOp = getCalledFunction(callOp);
assert(funcOp && "expected CallOp to a FuncOp");
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
if (getFuncOpAnalysisState(state, funcOp) !=
FuncOpAnalysisState::Analyzed) {
// FuncOp not analyzed yet. Any OpResult may be aliasing.
SmallVector<OpResult> result;
for (OpResult opResult : op->getOpResults())
if (opResult.getType().isa<TensorType>())
result.push_back(opResult);
return result;
}
// Get aliasing results from state.
auto aliasingReturnVals =
funcState.aliasingReturnVals.lookup(funcOp).lookup(
opOperand.getOperandNumber());
SmallVector<OpResult> result;
for (int64_t resultIdx : aliasingReturnVals)
result.push_back(callOp->getOpResult(resultIdx));
return result;
}
SmallVector<OpOperand *>
getAliasingOpOperand(Operation *op, OpResult opResult,
const AnalysisState &state) const {
func::CallOp callOp = cast<func::CallOp>(op);
FuncOp funcOp = getCalledFunction(callOp);
assert(funcOp && "expected CallOp to a FuncOp");
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
if (getFuncOpAnalysisState(state, funcOp) !=
FuncOpAnalysisState::Analyzed) {
// FuncOp not analyzed yet. Any OpOperand may be aliasing.
SmallVector<OpOperand *> result;
for (OpOperand &opOperand : op->getOpOperands())
if (opOperand.get().getType().isa<TensorType>())
result.push_back(&opOperand);
return result;
}
// Get aliasing bbArgs from state.
auto aliasingFuncArgs = funcState.aliasingFuncArgs.lookup(funcOp).lookup(
opResult.getResultNumber());
SmallVector<OpOperand *> result;
for (int64_t bbArgIdx : aliasingFuncArgs)
result.push_back(&callOp->getOpOperand(bbArgIdx));
return result;
}
BufferRelation bufferRelation(Operation *op, OpResult opResult,
const AnalysisState &state) const {
return BufferRelation::Equivalent;
}
/// All function arguments are writable. It is the responsibility of the
/// CallOp to insert buffer copies where necessary.
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
BufferizationState &state) const {
func::CallOp callOp = cast<func::CallOp>(op);
unsigned numResults = callOp.getNumResults();
unsigned numOperands = callOp->getNumOperands();
FuncOp funcOp = getCalledFunction(callOp);
assert(funcOp && "expected CallOp to a FuncOp");
FunctionType funcType = funcOp.getFunctionType();
const FuncAnalysisState &funcState =
getFuncAnalysisState(state.getAnalysisState());
const OneShotBufferizationOptions &options =
static_cast<const OneShotBufferizationOptions &>(state.getOptions());
// Result types of the bufferized CallOp.
SmallVector<Type> resultTypes;
// Replacement values for the existing CallOp. These are usually the results
// of the bufferized CallOp, unless a tensor result folds onto an operand.
SmallVector<Value> replacementValues(numResults, Value());
// For non-tensor results: A mapping from return val indices of the old
// CallOp to return val indices of the bufferized CallOp.
SmallVector<Optional<unsigned>> retValMapping(numResults, None);
// Operands of the bufferized CallOp.
SmallVector<Value> newOperands(numOperands, Value());
// Based on previously gathered equivalence information, we know if a
// tensor result folds onto an operand. These are the only tensor value
// results that are supported at the moment.
//
// For tensors return values that do not fold onto an operand, additional
// work is needed (TODO) to either:
// * hoist a result into an inplaceable operand or
// * devise a better representation to truly return a buffer.
//
// Note: If a function has no body, no equivalence information is
// available. Consequently, a tensor return value cannot be proven to fold
// onto a FuncOp bbArg, so calls to such functions are not bufferizable at
// the moment.
// 1. Compute the result types of the new CallOp. Tensor results that are
// equivalent to a FuncOp bbArg are no longer returned.
for (const auto &it : llvm::enumerate(callOp.getResultTypes())) {
unsigned returnValIdx = it.index();
Type returnType = it.value();
if (!returnType.isa<TensorType>()) {
// Non-tensor values are returned.
retValMapping[returnValIdx] = resultTypes.size();
resultTypes.push_back(returnType);
continue;
}
if (Optional<int64_t> bbArgIdx =
getEquivalentFuncArgIdx(funcOp, funcState, returnValIdx)) {
// Return operands that are equivalent to some bbArg, are not
// returned.
FailureOr<Value> bufferOrFailure =
state.getBuffer(rewriter, callOp->getOpOperand(*bbArgIdx));
if (failed(bufferOrFailure))
return failure();
replacementValues[returnValIdx] = *bufferOrFailure;
newOperands[*bbArgIdx] = *bufferOrFailure;
continue;
}
if (!options.allowReturnAllocs)
return callOp->emitError(
"call to FuncOp that returns non-equivalent tensors not supported");
// Returning a memref. This memref is not equivalent to any bbArg. It is
// likely a newly allocated buffer. We may want to hoist such allocations
// to the call site in the future.
retValMapping[returnValIdx] = resultTypes.size();
resultTypes.push_back(funcType.getResult(resultTypes.size()));
}
// 2. Rewrite tensor operands as memrefs based on `bufferizedFuncType`.
for (OpOperand &opOperand : callOp->getOpOperands()) {
unsigned idx = opOperand.getOperandNumber();
Value tensorOperand = opOperand.get();
// Non-tensor operands are just copied.
if (!tensorOperand.getType().isa<TensorType>()) {
newOperands[idx] = tensorOperand;
continue;
}
// Retrieve buffers for tensor operands. Tensor operand buffers, who's
// corresponding FuncOp bbArgs are equivalent to a returned tensor, were
// already stored in `newOperands` during Step 1.
Value buffer = newOperands[idx];
if (!buffer) {
FailureOr<Value> bufferOrFailure = state.getBuffer(rewriter, opOperand);
if (failed(bufferOrFailure))
return failure();
buffer = *bufferOrFailure;
}
// Caller / callee type mismatch is handled with a CastOp.
auto memRefType = funcType.getInput(idx);
// Since we don't yet have a clear layout story, to_memref may
// conservatively turn tensors into more dynamic memref than necessary.
// If the memref type of the callee fails, introduce an extra memref.cast
// that will either canonicalize away or fail compilation until we can do
// something better.
if (buffer.getType() != memRefType) {
assert(
memref::CastOp::areCastCompatible(buffer.getType(), memRefType) &&
"CallOp::bufferize: cast incompatible");
Value castBuffer = rewriter.create<memref::CastOp>(callOp.getLoc(),
memRefType, buffer);
buffer = castBuffer;
}
newOperands[idx] = buffer;
}
// 3. Create the new CallOp.
Operation *newCallOp = rewriter.create<func::CallOp>(
callOp.getLoc(), funcOp.getSymName(), resultTypes, newOperands);
newCallOp->setAttrs(callOp->getAttrs());
// Get replacement values for non-tensor / non-equivalent results.
for (unsigned i = 0; i < replacementValues.size(); ++i) {
if (replacementValues[i])
continue;
replacementValues[i] = newCallOp->getResult(*retValMapping[i]);
}
// 4. Replace the old op with the new op.
replaceOpWithBufferizedValues(rewriter, callOp, replacementValues);
return success();
}
};
struct ReturnOpInterface
: public BufferizableOpInterface::ExternalModel<ReturnOpInterface,
func::ReturnOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
BufferizationState &state) const {
#ifndef NDEBUG
auto returnOp = cast<func::ReturnOp>(op);
assert(isa<FuncOp>(returnOp->getParentOp()) &&
"only support FuncOp parent for ReturnOp");
#endif // NDEBUG
// ReturnOps are bufferized as part of FuncOps.
return failure();
}
};
struct FuncOpInterface
: public BufferizableOpInterface::ExternalModel<FuncOpInterface, FuncOp> {
/// Rewrite function bbArgs and return values into buffer form (using the
/// canonical memref layout for now). This function bufferizes the function
/// signature and the ReturnOp. When the entire function body has been
/// bufferized, function return types can be switched to more concise memref
/// types as part of `foldMemRefCasts`.
///
/// When a tensor function argument is known to be equivalent to a tensor
/// result, it is dropped from the return values.
///
/// All function bbArgs are writable unless they are explicitly marked as
/// read-only. Callers must insert copies when needed.
///
/// Note: Returning a memref is possible, but corresponding CallOp
/// bufferizations fail unless `allowReturnAllocs`.
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
BufferizationState &state) const {
auto funcOp = cast<FuncOp>(op);
FunctionType funcType = funcOp.getFunctionType();
const FuncAnalysisState &funcState =
getFuncAnalysisState(state.getAnalysisState());
const BufferizationOptions &options = state.getOptions();
// Construct the bufferized function type.
SmallVector<Type> argTypes;
for (const auto &it : llvm::enumerate(funcType.getInputs())) {
Type argType = it.value();
if (auto tensorType = argType.dyn_cast<TensorType>()) {
argTypes.push_back(
getBufferizedFunctionArgType(funcOp, it.index(), options));
continue;
}
argTypes.push_back(argType);
}
// Bodiless functions are assumed opaque and we cannot know the
// bufferization contract they want to enforce. As a consequence, only
// support functions that don't return any tensors atm.
if (funcOp.getBody().empty()) {
SmallVector<Type> retTypes;
for (Type resultType : funcType.getResults()) {
if (resultType.isa<TensorType>())
return funcOp->emitError() << "cannot bufferize bodiless function "
<< "that returns a tensor";
retTypes.push_back(resultType);
}
funcOp.setType(FunctionType::get(op->getContext(), argTypes, retTypes));
return success();
}
// TODO: Support functions with multiple returns.
func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
assert(returnOp && "expected func with single return op");
// 1. Rewrite the bbArgs. Turn every tensor bbArg into a memref bbArg.
Block &frontBlock = funcOp.getBody().front();
for (BlockArgument &bbArg : frontBlock.getArguments()) {
auto tensorType = bbArg.getType().dyn_cast<TensorType>();
// Non-tensor types stay the same.
if (!tensorType)
continue;
// Collect all uses of the bbArg.
SmallVector<OpOperand *> bbArgUses;
for (OpOperand &use : bbArg.getUses())
bbArgUses.push_back(&use);
// Change the bbArg type to memref.
Type memrefType =
getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), options);
bbArg.setType(memrefType);
// Replace all uses of the original tensor bbArg.
rewriter.setInsertionPointToStart(&frontBlock);
if (!bbArgUses.empty()) {
// Insert to_tensor because the remaining function body has not been
// bufferized yet.
Value toTensorOp =
rewriter.create<bufferization::ToTensorOp>(funcOp.getLoc(), bbArg);
for (OpOperand *use : bbArgUses)
use->set(toTensorOp);
}
}
// 2. For each result, keep track of which inplace argument it reuses.
SmallVector<Value> returnValues;
for (OpOperand &returnOperand : returnOp->getOpOperands()) {
Value returnVal = returnOperand.get();
// If not a tensor type just forward it.
if (!returnVal.getType().isa<RankedTensorType>()) {
returnValues.push_back(returnVal);
continue;
}
// If return operand is equivalent to some bbArg, no need to return it.
if (Optional<int64_t> equivBbArgIdx = getEquivalentFuncArgIdx(
funcOp, funcState, returnOperand.getOperandNumber())) {
rewriter.setInsertionPoint(returnOp);
Location loc = returnOp.getLoc();
Value toMemrefOp = rewriter.create<bufferization::ToMemrefOp>(
loc, getMemRefType(returnVal.getType().cast<TensorType>(), options),
returnVal);
BlockArgument equivBbArg = funcOp.getArgument(*equivBbArgIdx);
// Note: This copy will fold away. It must be inserted here to ensure
// that `returnVal` still has at least one use and does not fold away.
if (failed(
createMemCpy(rewriter, loc, toMemrefOp, equivBbArg, options)))
return funcOp->emitError("could not generate copy for bbArg");
continue;
}
returnValues.push_back(*state.getBuffer(rewriter, returnOperand));
}
// 3. Rewrite the terminator without the in-place bufferizable values.
returnOp.operandsMutable().assign(returnValues);
// 4. Rewrite the FuncOp type to buffer form.
funcOp.setType(FunctionType::get(op->getContext(), argTypes,
ValueRange(returnValues).getTypes()));
return success();
}
/// Return `true` if the given function argument is writable.
bool isWritable(Operation *op, Value value,
const AnalysisState &state) const {
auto funcOp = cast<FuncOp>(op);
BlockArgument bbArg = value.dyn_cast<BlockArgument>();
assert(bbArg && "expected BlockArgument");
// "bufferization.writable" overrides other writability decisions. This is
// currently used for testing only.
if (BoolAttr writable = funcOp.getArgAttrOfType<BoolAttr>(
bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName))
return writable.getValue();
// All function arguments are writable by default.
return true;
}
bool isAllocationHoistingBarrier(Operation *op) const { return true; }
};
} // namespace func_ext
} // namespace bufferization
} // namespace mlir
void mlir::bufferization::func_ext::
registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext *ctx, func::FuncDialect *dialect) {
func::CallOp::attachInterface<func_ext::CallOpInterface>(*ctx);
func::FuncOp::attachInterface<func_ext::FuncOpInterface>(*ctx);
func::ReturnOp::attachInterface<func_ext::ReturnOpInterface>(*ctx);
});
}

View File

@@ -0,0 +1,497 @@
//===- ModuleBufferization.cpp - Bufferization across Func. Boundaries ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Module Bufferization is an extension of One-Shot Bufferize that
// bufferizes function boundaries. It provides `BufferizableOpInterface`
// implementations for FuncOp, CallOp and ReturnOp.
//
// Module Bufferization is run via `runOneShotModuleBufferize(ModuleOp, ...)`.
// This function analyzes the given module and determines the order of analysis
// and bufferization: Functions that are called are processed before their
// respective callers.
//
// After analyzing a FuncOp, additional information about its bbArgs is
// gathered through PostAnalysisStepFns and stored in `FuncAnalysisState`.
//
// * `aliasingFuncOpBBArgsAnalysis` determines the equivalent/aliasing bbArgs
// for
// each tensor return value (if any).
// * `funcOpBbArgReadWriteAnalysis` determines whether or not a tensor bbArg is
// read/written.
//
// Only tensors that are equivalent to some FuncOp bbArg may be returned.
// Bufferization currently fails if other tensors (in particular tensors that
// bufferize out-of-place and result in a new buffer allocation) are returned.
// In the future, such allocations could be hoisted to the caller.
//
// Example: `foo` fails bufferization because %0 is not equivalent to any bbArg.
// ```
// func @foo() -> tensor<?xf32> {
// %0 = linalg.init_tensor [...] : tensor<?xf32>
// return %0 : tensor<?xf32>
// }
// ```
//
// Module Bufferization implements the following calling convention.
//
// * In the absence of conflicts within a FuncOp, the FuncOp's bbArgs may always
// be written to in-place.
// * If a tensor operand of a CallOp is read after the CallOp, the operand of
// the CallOp must bufferize out-of-place.
//
// Example: The tensor.insert op bufferizes in-place because it is allowed to
// modify the buffer of `%t1` directly. The CallOp in `caller` must bufferize
// out-of-place because `%t0` is modified by the callee but read by the
// tensor.extract op. The analysis of CallOps decides whether an OpOperand must
// bufferize out-of-place based on results of `funcOpBbArgReadWriteAnalysis`.
// ```
// func @callee(%t1 : tensor<?xf32>) -> tensor<?xf32> {
// %f = ... : f32
// %0 = tensor.insert %f into %t1[...] : tensor<?xf32>
// return %0 : tensor<?xf32>
// }
//
// func @caller() -> () {
// %t0 = ... : tensor<?xf32>
// %1 = call @callee(%t0) : (tensor<?xf32>) -> (tensor<?xf32>)
// %2 = tensor.extract %1[...] : tensor<?xf32>
// }
// ```
//
// Note: If a function is external, `funcOpBbArgReadWriteAnalysis` cannot
// analyze the function body. In such a case, the CallOp analysis conservatively
// assumes that each tensor OpOperand is both read and written.
//
// TODO: Add FuncOp attributes so that bbArgs of external FuncOps can be marked
// as "not reading" and/or "not writing".
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/Operation.h"
using namespace mlir;
using namespace mlir::bufferization;
using namespace mlir::bufferization::func_ext;
/// A mapping of FuncOps to their callers.
using FuncCallerMap = DenseMap<func::FuncOp, DenseSet<Operation *>>;
/// Get FuncAnalysisState.
static const FuncAnalysisState &
getFuncAnalysisState(const AnalysisState &state) {
Optional<const FuncAnalysisState *> maybeState =
state.getDialectState<FuncAnalysisState>(
func::FuncDialect::getDialectNamespace());
assert(maybeState.hasValue() && "FuncAnalysisState does not exist");
return **maybeState;
}
/// Get or create FuncAnalysisState.
static FuncAnalysisState &getFuncAnalysisState(AnalysisState &state) {
return state.getOrCreateDialectState<FuncAnalysisState>(
func::FuncDialect::getDialectNamespace());
}
/// Return the state (phase) of analysis of the FuncOp.
static FuncOpAnalysisState getFuncOpAnalysisState(const AnalysisState &state,
func::FuncOp funcOp) {
const FuncAnalysisState &funcState = getFuncAnalysisState(state);
auto it = funcState.analyzedFuncOps.find(funcOp);
if (it == funcState.analyzedFuncOps.end())
return FuncOpAnalysisState::NotAnalyzed;
return it->second;
}
/// Return the unique ReturnOp that terminates `funcOp`.
/// Return nullptr if there is no such unique ReturnOp.
static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) {
func::ReturnOp returnOp;
for (Block &b : funcOp.getBody()) {
if (auto candidateOp = dyn_cast<func::ReturnOp>(b.getTerminator())) {
if (returnOp)
return nullptr;
returnOp = candidateOp;
}
}
return returnOp;
}
namespace {
/// Annotate IR with the results of the analysis. For testing purposes only.
static void annotateEquivalentReturnBbArg(OpOperand &returnVal,
BlockArgument bbArg) {
const char *kEquivalentArgsAttr = "__equivalent_func_args__";
Operation *op = returnVal.getOwner();
SmallVector<int64_t> equivBbArgs;
if (op->hasAttr(kEquivalentArgsAttr)) {
auto attr = op->getAttr(kEquivalentArgsAttr).cast<ArrayAttr>();
equivBbArgs = llvm::to_vector<4>(llvm::map_range(attr, [](Attribute a) {
return a.cast<IntegerAttr>().getValue().getSExtValue();
}));
} else {
equivBbArgs.append(op->getNumOperands(), -1);
}
equivBbArgs[returnVal.getOperandNumber()] = bbArg.getArgNumber();
OpBuilder b(op->getContext());
op->setAttr(kEquivalentArgsAttr, b.getI64ArrayAttr(equivBbArgs));
}
/// Store function BlockArguments that are equivalent to/aliasing a returned
/// value in FuncAnalysisState.
static LogicalResult
aliasingFuncOpBBArgsAnalysis(Operation *op, AnalysisState &state,
BufferizationAliasInfo &aliasInfo,
SmallVector<Operation *> &newOps) {
FuncAnalysisState &funcState = getFuncAnalysisState(state);
// Support only single return-terminated block in the function.
auto funcOp = cast<func::FuncOp>(op);
func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
assert(returnOp && "expected func with single return op");
for (OpOperand &returnVal : returnOp->getOpOperands())
if (returnVal.get().getType().isa<RankedTensorType>())
for (BlockArgument bbArg : funcOp.getArguments())
if (bbArg.getType().isa<RankedTensorType>()) {
int64_t returnIdx = returnVal.getOperandNumber();
int64_t bbArgIdx = bbArg.getArgNumber();
if (aliasInfo.areEquivalentBufferizedValues(returnVal.get(), bbArg)) {
funcState.equivalentFuncArgs[funcOp][returnIdx] = bbArgIdx;
if (state.getOptions().testAnalysisOnly)
annotateEquivalentReturnBbArg(returnVal, bbArg);
}
if (aliasInfo.areAliasingBufferizedValues(returnVal.get(), bbArg)) {
funcState.aliasingFuncArgs[funcOp][returnIdx].push_back(bbArgIdx);
funcState.aliasingReturnVals[funcOp][bbArgIdx].push_back(returnIdx);
}
}
return success();
}
/// Return true if the buffer of the given tensor value is written to. Must not
/// be called for values inside not yet analyzed functions. (Post-analysis
/// steps do not have to be run yet, i.e., "in progress" is also OK.)
static bool isValueWritten(Value value, const AnalysisState &state,
const BufferizationAliasInfo &aliasInfo) {
#ifndef NDEBUG
assert(value.getType().isa<TensorType>() && "expected TensorType");
func::FuncOp funcOp;
if (auto bbArg = value.dyn_cast<BlockArgument>()) {
Operation *owner = bbArg.getOwner()->getParentOp();
funcOp = isa<func::FuncOp>(owner) ? cast<func::FuncOp>(owner)
: owner->getParentOfType<func::FuncOp>();
} else {
funcOp = value.getDefiningOp()->getParentOfType<func::FuncOp>();
}
assert(getFuncOpAnalysisState(state, funcOp) !=
FuncOpAnalysisState::NotAnalyzed &&
"FuncOp must be fully analyzed or analysis in progress");
#endif // NDEBUG
bool isWritten = false;
aliasInfo.applyOnAliases(value, [&](Value val) {
for (OpOperand &use : val.getUses())
if (state.isInPlace(use) && state.bufferizesToMemoryWrite(use))
isWritten = true;
});
return isWritten;
}
static void annotateFuncArgAccess(func::FuncOp funcOp, BlockArgument bbArg,
bool isRead, bool isWritten) {
OpBuilder b(funcOp.getContext());
Attribute accessType;
if (isRead && isWritten) {
accessType = b.getStringAttr("read-write");
} else if (isRead) {
accessType = b.getStringAttr("read");
} else if (isWritten) {
accessType = b.getStringAttr("write");
} else {
accessType = b.getStringAttr("none");
}
funcOp.setArgAttr(bbArg.getArgNumber(), "bufferization.access", accessType);
}
/// Determine which FuncOp bbArgs are read and which are written. If this
/// PostAnalysisStepFn is run on a function with unknown ops, it will
/// conservatively assume that such ops bufferize to a read + write.
static LogicalResult
funcOpBbArgReadWriteAnalysis(Operation *op, AnalysisState &state,
BufferizationAliasInfo &aliasInfo,
SmallVector<Operation *> &newOps) {
FuncAnalysisState &funcState = getFuncAnalysisState(state);
auto funcOp = cast<func::FuncOp>(op);
// If the function has no body, conservatively assume that all args are
// read + written.
if (funcOp.getBody().empty()) {
for (BlockArgument bbArg : funcOp.getArguments()) {
funcState.readBbArgs[funcOp].insert(bbArg.getArgNumber());
funcState.writtenBbArgs[funcOp].insert(bbArg.getArgNumber());
}
return success();
}
for (BlockArgument bbArg : funcOp.getArguments()) {
if (!bbArg.getType().isa<TensorType>())
continue;
bool isRead = state.isValueRead(bbArg);
bool isWritten = isValueWritten(bbArg, state, aliasInfo);
if (state.getOptions().testAnalysisOnly)
annotateFuncArgAccess(funcOp, bbArg, isRead, isWritten);
if (isRead)
funcState.readBbArgs[funcOp].insert(bbArg.getArgNumber());
if (isWritten)
funcState.writtenBbArgs[funcOp].insert(bbArg.getArgNumber());
}
return success();
}
} // namespace
/// Remove bufferization attributes on FuncOp arguments.
static void removeBufferizationAttributes(BlockArgument bbArg) {
auto funcOp = cast<func::FuncOp>(bbArg.getOwner()->getParentOp());
funcOp.removeArgAttr(bbArg.getArgNumber(),
BufferizationDialect::kBufferLayoutAttrName);
funcOp.removeArgAttr(bbArg.getArgNumber(),
BufferizationDialect::kWritableAttrName);
}
/// Return the func::FuncOp called by `callOp`.
static func::FuncOp getCalledFunction(CallOpInterface callOp) {
SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
if (!sym)
return nullptr;
return dyn_cast_or_null<func::FuncOp>(
SymbolTable::lookupNearestSymbolFrom(callOp, sym));
}
/// Gather equivalence info of CallOps.
/// Note: This only adds new equivalence info if the called function was already
/// analyzed.
// TODO: This does not handle cyclic function call graphs etc.
static void equivalenceAnalysis(func::FuncOp funcOp,
BufferizationAliasInfo &aliasInfo,
FuncAnalysisState &funcState) {
funcOp->walk([&](func::CallOp callOp) {
func::FuncOp calledFunction = getCalledFunction(callOp);
assert(calledFunction && "could not retrieved called func::FuncOp");
// No equivalence info available for the called function.
if (!funcState.equivalentFuncArgs.count(calledFunction))
return WalkResult::skip();
for (auto it : funcState.equivalentFuncArgs[calledFunction]) {
int64_t returnIdx = it.first;
int64_t bbargIdx = it.second;
Value returnVal = callOp.getResult(returnIdx);
Value argVal = callOp->getOperand(bbargIdx);
aliasInfo.unionEquivalenceClasses(returnVal, argVal);
}
return WalkResult::advance();
});
}
/// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by
/// callee-caller order (i.e. callees without callers first).
/// Store the map of FuncOp to all its callers in `callerMap`.
/// Return `failure()` if a cycle of calls is detected or if we are unable to
/// retrieve the called FuncOp from any CallOpInterface.
static LogicalResult
getFuncOpsOrderedByCalls(ModuleOp moduleOp,
SmallVectorImpl<func::FuncOp> &orderedFuncOps,
FuncCallerMap &callerMap) {
// For each FuncOp, the set of functions called by it (i.e. the union of
// symbols of all nested CallOpInterfaceOp).
DenseMap<func::FuncOp, DenseSet<func::FuncOp>> calledBy;
// For each FuncOp, the number of CallOpInterface it contains.
DenseMap<func::FuncOp, unsigned> numberCallOpsContainedInFuncOp;
WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult {
if (!funcOp.getBody().empty()) {
func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
if (!returnOp)
return funcOp->emitError()
<< "cannot bufferize a FuncOp with tensors and "
"without a unique ReturnOp";
}
numberCallOpsContainedInFuncOp[funcOp] = 0;
return funcOp.walk([&](CallOpInterface callOp) -> WalkResult {
// Only support CallOp for now.
if (!isa<func::CallOp>(callOp.getOperation()))
return callOp->emitError() << "expected a CallOp";
func::FuncOp calledFunction = getCalledFunction(callOp);
assert(calledFunction && "could not retrieved called func::FuncOp");
auto it = callerMap.try_emplace(calledFunction, DenseSet<Operation *>{});
it.first->getSecond().insert(callOp);
if (calledBy[calledFunction].count(funcOp) == 0) {
calledBy[calledFunction].insert(funcOp);
numberCallOpsContainedInFuncOp[funcOp]++;
}
return WalkResult::advance();
});
});
if (res.wasInterrupted())
return failure();
// Iteratively remove function operation that do not call any of the
// functions remaining in the callCounter map and add them to the worklist.
while (!numberCallOpsContainedInFuncOp.empty()) {
auto it = llvm::find_if(numberCallOpsContainedInFuncOp,
[](auto entry) { return entry.getSecond() == 0; });
if (it == numberCallOpsContainedInFuncOp.end())
return moduleOp.emitOpError(
"expected callgraph to be free of circular dependencies.");
orderedFuncOps.push_back(it->getFirst());
for (auto callee : calledBy[it->getFirst()])
numberCallOpsContainedInFuncOp[callee]--;
numberCallOpsContainedInFuncOp.erase(it);
}
return success();
}
/// Set the attribute that triggers inplace bufferization on a FuncOp argument
/// `bbArg`.
static void setInPlaceFuncArgument(BlockArgument bbArg, bool inPlace) {
auto funcOp = cast<func::FuncOp>(bbArg.getOwner()->getParentOp());
funcOp.setArgAttr(bbArg.getArgNumber(),
BufferizableOpInterface::kInplaceableAttrName,
BoolAttr::get(bbArg.getContext(), inPlace));
}
/// Annotate the IR with the result of the analysis. For testing/debugging only.
static void annotateOpsWithBufferizationMarkers(func::FuncOp funcOp,
const AnalysisState &state) {
auto bufferizableOp = cast<BufferizableOpInterface>(funcOp.getOperation());
for (BlockArgument bbArg : funcOp.getArguments())
if (bbArg.getType().isa<TensorType>())
setInPlaceFuncArgument(bbArg, bufferizableOp.isWritable(bbArg, state));
}
/// Fold return values that are memref casts and update function return types.
///
/// During FuncOp bufferization, the exact type of the returned memrefs (if any)
/// is not known yet. Therefore, the bufferization uses memref types with the
/// most generic layout map as function return types. After bufferizing the
/// entire function body, a more concise memref type can potentially be used for
/// the return type of the function.
static void foldMemRefCasts(func::FuncOp funcOp) {
if (funcOp.getBody().empty())
return;
func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
SmallVector<Type> resultTypes;
for (OpOperand &operand : returnOp->getOpOperands()) {
if (auto castOp = operand.get().getDefiningOp<memref::CastOp>()) {
operand.set(castOp.source());
resultTypes.push_back(castOp.source().getType());
} else {
resultTypes.push_back(operand.get().getType());
}
}
auto newFuncType = FunctionType::get(
funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes);
funcOp.setType(newFuncType);
}
LogicalResult mlir::bufferization::runOneShotModuleBufferize(
ModuleOp moduleOp, OneShotBufferizationOptions options) {
IRRewriter rewriter(moduleOp.getContext());
OneShotAnalysisState analysisState(moduleOp, options);
BufferizationState bufferizationState(analysisState);
FuncAnalysisState &funcState = getFuncAnalysisState(analysisState);
BufferizationAliasInfo &aliasInfo = analysisState.getAliasInfo();
// A list of functions in the order in which they are analyzed + bufferized.
SmallVector<func::FuncOp> orderedFuncOps;
// A mapping of FuncOps to their callers.
FuncCallerMap callerMap;
if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap)))
return failure();
// Collect bbArg/return value information after the analysis.
options.addPostAnalysisStep(aliasingFuncOpBBArgsAnalysis);
options.addPostAnalysisStep(funcOpBbArgReadWriteAnalysis);
// Analyze ops.
for (func::FuncOp funcOp : orderedFuncOps) {
// No body => no analysis.
if (funcOp.getBody().empty())
continue;
// Now analyzing function.
funcState.startFunctionAnalysis(funcOp);
// Gather equivalence info for CallOps.
equivalenceAnalysis(funcOp, aliasInfo, funcState);
// Analyze funcOp.
if (failed(analyzeOp(funcOp, analysisState)))
return failure();
// Mark op as fully analyzed.
funcState.analyzedFuncOps[funcOp] = FuncOpAnalysisState::Analyzed;
// Add annotations to function arguments.
if (options.testAnalysisOnly)
annotateOpsWithBufferizationMarkers(funcOp, analysisState);
}
if (options.testAnalysisOnly)
return success();
// Bufferize functions.
for (func::FuncOp funcOp : orderedFuncOps) {
// Note: It would be good to apply cleanups here but we cannot as aliasInfo
// would be invalidated.
if (failed(bufferizeOp(funcOp, bufferizationState)))
return failure();
foldMemRefCasts(funcOp);
}
// Check result.
for (func::FuncOp funcOp : orderedFuncOps) {
if (!options.allowReturnAllocs &&
llvm::any_of(funcOp.getFunctionType().getResults(), [](Type t) {
return t.isa<MemRefType, UnrankedMemRefType>();
})) {
funcOp->emitError("memref return type is unsupported");
return failure();
}
}
// Finalize all buffers.
if (failed(finalizeBuffers(moduleOp, options)))
return failure();
// Post-pass cleanup of function argument attributes.
moduleOp.walk([&](func::FuncOp op) {
for (BlockArgument bbArg : op.getArguments())
removeBufferizationAttributes(bbArg);
});
return success();
}

View File

@@ -1,5 +1,4 @@
add_subdirectory(Analysis)
add_subdirectory(ComprehensiveBufferize)
add_subdirectory(IR)
add_subdirectory(Transforms)
add_subdirectory(Utils)

View File

@@ -1,11 +0,0 @@
add_mlir_dialect_library(MLIRModuleBufferization
ModuleBufferization.cpp
LINK_LIBS PUBLIC
MLIRBufferization
MLIRBufferizationTransforms
MLIRFunc
MLIRFuncTransforms
MLIRIR
MLIRMemRef
)

View File

@@ -133,17 +133,6 @@ LogicalResult LinalgDialect::verifyOperationAttribute(Operation *op,
<< " to be used on function-like operations";
return success();
}
if (attr.getName() == BufferizableOpInterface::kBufferLayoutAttrName) {
if (!attr.getValue().isa<AffineMapAttr>()) {
return op->emitError()
<< "'" << BufferizableOpInterface::kBufferLayoutAttrName
<< "' is expected to be a affine map attribute";
}
if (!isa<FunctionOpInterface>(op))
return op->emitError() << "expected " << attr.getName()
<< " to be used on function-like operations";
return success();
}
if (attr.getName() == LinalgDialect::kMemoizedIndexingMapsAttrName)
return success();
return op->emitError() << "attribute '" << attr.getName()

View File

@@ -40,6 +40,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
MLIRArithmetic
MLIRArithmeticTransforms
MLIRBufferization
MLIRBufferizationTransforms
MLIRComplex
MLIRFunc
MLIRFuncToLLVM
@@ -50,7 +51,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms
MLIRLinalg
MLIRLinalgAnalysis
MLIRLinalgUtils
MLIRModuleBufferization
MLIRSCF
MLIRSCFTransforms
MLIRSCFUtils

View File

@@ -11,10 +11,11 @@
#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h"
@@ -28,7 +29,6 @@
using namespace mlir;
using namespace mlir::bufferization;
using namespace mlir::linalg;
using namespace mlir::linalg::comprehensive_bufferize;
namespace {
struct LinalgComprehensiveModuleBufferize
@@ -55,7 +55,7 @@ struct LinalgComprehensiveModuleBufferize
bufferization::registerAllocationOpInterfaceExternalModels(registry);
linalg::registerBufferizableOpInterfaceExternalModels(registry);
scf::registerBufferizableOpInterfaceExternalModels(registry);
std_ext::registerModuleBufferizationExternalModels(registry);
func_ext::registerBufferizableOpInterfaceExternalModels(registry);
tensor::registerBufferizableOpInterfaceExternalModels(registry);
vector::registerBufferizableOpInterfaceExternalModels(registry);
}
@@ -109,7 +109,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
ModuleOp moduleOp = getOperation();
applyEnablingTransformations(moduleOp);
if (failed(runModuleBufferize(moduleOp, opt))) {
if (failed(runOneShotModuleBufferize(moduleOp, opt))) {
signalPassFailure();
return;
}

View File

@@ -204,8 +204,8 @@ func.func @simple_tensor_test(%t1 : tensor<?xf32>, %f : f32) -> tensor<?xf32> {
// -----
// CHECK-SCF-LABEL: func @simple_scf_if(
// CHECK-SCF-SAME: %[[t1:.*]]: tensor<?xf32> {linalg.inplaceable = true}, %[[c:.*]]: i1, %[[pos:.*]]: index
func.func @simple_scf_if(%t1: tensor<?xf32> {linalg.inplaceable = true}, %c: i1, %pos: index, %f: f32)
// CHECK-SCF-SAME: %[[t1:.*]]: tensor<?xf32> {bufferization.writable = true}, %[[c:.*]]: i1, %[[pos:.*]]: index
func.func @simple_scf_if(%t1: tensor<?xf32> {bufferization.writable = true}, %c: i1, %pos: index, %f: f32)
-> (tensor<?xf32>, index) {
// CHECK-SCF: %[[r:.*]] = scf.if %[[c]] -> (memref<?xf32, #{{.*}}>) {
%r1, %r2 = scf.if %c -> (tensor<?xf32>, index) {

View File

@@ -1,12 +1,12 @@
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=allow-return-allocs -split-input-file | FileCheck %s
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null
// Make sure that the returned buffer is not deallocated.
// TODO: Such buffers currently leak. We need buffer hoisting / ref counting for

View File

@@ -1,9 +1,12 @@
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// TODO: Extract op-specific test cases and move them to their respective
// dialects.
//===----------------------------------------------------------------------===//
// Simple cases
@@ -12,10 +15,10 @@
// -----
// CHECK-LABEL: func @extract_slice_fun(
func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "read"
func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<4xf32>, tensor<8xf32>)
{
// tensor.extract_slice is not used in a write, it is not compelled to
@@ -36,12 +39,12 @@ func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// -----
// CHECK-LABEL: func @insert_slice_fun(
func.func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read-write"
%C : tensor<4xf32> {linalg.inplaceable = false})
// CHECK-SAME: bufferization.access = "read"
func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read-write"
%C : tensor<4xf32> {bufferization.writable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<?xf32>, tensor<?xf32>)
{
// must bufferize out of place.
@@ -62,10 +65,10 @@ func.func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// -----
// CHECK-LABEL: func @conflict_on_B(
func.func @conflict_on_B(%A : tensor<4x4xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<4x4xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "read-write"
func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<4x4xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "read-write"
-> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
{
// matmul output operand interferes with input operand.
@@ -102,9 +105,9 @@ func.func @conflict_on_B(%A : tensor<4x4xf32> {linalg.inplaceable = true},
// CHECK-LABEL: func @extract_slice_extract_slice(
func.func @extract_slice_extract_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = false})
%B : tensor<?xf32> {bufferization.writable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<2xf32>, tensor<2xf32>)
{
@@ -131,17 +134,17 @@ func.func @extract_slice_extract_slice(
// CHECK-LABEL: func @insert_slice_insert_slice(
func.func @insert_slice_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read-write"
%A2 : tensor<4xf32> {linalg.inplaceable = true},
%A2 : tensor<4xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read-write"
%A3 : tensor<2xf32> {linalg.inplaceable = true},
%A3 : tensor<2xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {bufferization.writable = false},
// CHECK-SAME: bufferization.access = "read"
%B2 : tensor<4xf32> {linalg.inplaceable = false},
%B2 : tensor<4xf32> {bufferization.writable = false},
// CHECK-SAME: bufferization.access = "read"
%B3 : tensor<2xf32> {linalg.inplaceable = false})
%B3 : tensor<2xf32> {bufferization.writable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<?xf32>, tensor<?xf32>)
{
@@ -166,8 +169,8 @@ func.func @insert_slice_insert_slice(
// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
func.func @extract_slice_nonmatching_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false},
%A : tensor<?xf32> {bufferization.writable = true},
%B : tensor<?xf32> {bufferization.writable = false},
%idx: index)
-> (tensor<?xf32>, tensor<?xf32>)
{
@@ -205,8 +208,8 @@ func.func @extract_slice_nonmatching_insert_slice(
// CHECK-LABEL: func @extract_slice_matching_insert_slice
func.func @extract_slice_matching_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false})
%A : tensor<?xf32> {bufferization.writable = true},
%B : tensor<?xf32> {bufferization.writable = false})
-> (tensor<?xf32>, tensor<?xf32>)
{
// %r1 bufferizes inplace because %A is inplaceable.
@@ -243,7 +246,7 @@ func.func @extract_slice_matching_insert_slice(
// CHECK-LABEL: @read_of_matching_insert_slice_source
func.func @read_of_matching_insert_slice_source(
%A : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
%idx : index,
%idx2 : index)
-> (tensor<?xf32>, vector<5xf32>)
@@ -274,7 +277,7 @@ func.func @read_of_matching_insert_slice_source(
// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved
func.func @read_of_matching_insert_slice_source_interleaved(
%A : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
%idx : index,
%idx2 : index,
%idx3 : index)
@@ -318,9 +321,9 @@ func.func @read_of_matching_insert_slice_source_interleaved(
// CHECK-LABEL: func @extract_slice_linalg_readonly_use
func.func @extract_slice_linalg_readonly_use(
%A : tensor<?x?xf32> {linalg.inplaceable = false},
%B : tensor<4x4xf32> {linalg.inplaceable = false},
%C : tensor<4x4xf32> {linalg.inplaceable = true})
%A : tensor<?x?xf32> {bufferization.writable = false},
%B : tensor<4x4xf32> {bufferization.writable = false},
%C : tensor<4x4xf32> {bufferization.writable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// tensor.extract_slice is only used as a read, no interference irrespective
@@ -352,9 +355,9 @@ func.func @extract_slice_linalg_readonly_use(
// CHECK-LABEL: func @extract_slice_to_linalg_write_use
func.func @extract_slice_to_linalg_write_use(
%A : tensor<4x4xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = false},
%C : tensor<?x?xf32> {linalg.inplaceable = true})
%A : tensor<4x4xf32> {bufferization.writable = false},
%B : tensor<?x?xf32> {bufferization.writable = false},
%C : tensor<?x?xf32> {bufferization.writable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// Step 4. %sB forward propagates to a write in %D but it is not inplace.
@@ -396,9 +399,9 @@ func.func @insert_slice_double_extract_slice(
%s2: index,
%s3: index,
%s4: index,
%A: tensor<8x6xf32> {linalg.inplaceable = false},
%B: tensor<6x6xf32> {linalg.inplaceable = false},
%C: tensor<30x20xf32> {linalg.inplaceable = true})
%A: tensor<8x6xf32> {bufferization.writable = false},
%B: tensor<6x6xf32> {bufferization.writable = false},
%C: tensor<30x20xf32> {bufferization.writable = true})
-> tensor<30x20xf32>
{
// CHECK: tensor.extract_slice
@@ -430,9 +433,9 @@ func.func @insert_slice_double_extract_slice(
// CHECK-LABEL: func @extract_slice_to_linalg_write_use
func.func @extract_slice_to_linalg_write_use(
%A : tensor<4x4xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = false},
%C : tensor<?x?xf32> {linalg.inplaceable = true})
%A : tensor<4x4xf32> {bufferization.writable = false},
%B : tensor<?x?xf32> {bufferization.writable = false},
%C : tensor<?x?xf32> {bufferization.writable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// Step 4. %sB forward propagates to an inplace write in %D.
@@ -472,9 +475,9 @@ func.func @extract_slice_to_linalg_write_use(
// CHECK-LABEL: func @nested_extract_slice_and_insert
func.func @nested_extract_slice_and_insert(
%A : tensor<?x?xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = true},
%C : tensor<?x?xf32> {linalg.inplaceable = true},
%A : tensor<?x?xf32> {bufferization.writable = false},
%B : tensor<?x?xf32> {bufferization.writable = true},
%C : tensor<?x?xf32> {bufferization.writable = true},
%idx : index,
%sz1 : index,
%sz2 : index)
@@ -564,8 +567,8 @@ func.func @nested_extract_slice_and_insert(
// CHECK-LABEL: func @scf_for_yield_only
func.func @scf_for_yield_only(
%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index,
%ub : index,
%step : index)
@@ -596,9 +599,9 @@ func.func @scf_for_yield_only(
// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
func.func @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%C : tensor<4xf32> {linalg.inplaceable = false},
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = false},
%lb : index,
%ub : index,
%step : index)
@@ -634,8 +637,8 @@ func.func private @some_use(tensor<?xf32>) -> ()
// CHECK-LABEL: func @scf_for_deps
func.func @scf_for_deps(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index,
%ub : index,
%step : index)
@@ -680,7 +683,7 @@ func.func @scf_for_deps(
func.func private @foo(tensor<64xf32>)
// CHECK-LABEL: dependence_through_call
func.func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) {
func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) {
%f1 = arith.constant 1.000000e+00 : f32
%f2 = arith.constant 2.000000e+00 : f32
@@ -712,8 +715,8 @@ func.func private @bar(%A : tensor<64xf32>) {
}
func.func @read_dependence_through_scf_and_call(
%I : tensor<64xf32> {linalg.inplaceable = true},
%I2 : tensor<64xf32> {linalg.inplaceable = true}) {
%I : tensor<64xf32> {bufferization.writable = true},
%I2 : tensor<64xf32> {bufferization.writable = true}) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
@@ -784,10 +787,10 @@ func.func @write_into_constant_via_alias(%v : vector<5xi32>,
// -----
func.func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -822,10 +825,10 @@ func.func @matmul_on_tensors(
// -----
func.func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -878,11 +881,11 @@ func.func @matmul_on_tensors(
func.func @insert_slice_chain(
%v1: vector<32x90xf32>,
%v2: vector<30x90xf32>,
%arg0: tensor<62x126xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
// CHECK-SAME: bufferization.access = "none"
%arg1: tensor<126x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
// CHECK-SAME: bufferization.access = "none"
%arg2: tensor<62x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
// CHECK-SAME: bufferization.access = "write"
-> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
{
@@ -926,7 +929,7 @@ func.func @insert_slice_chain(
// Only test IR validity wrt dominance.
// CHECK-LABEL: func @ip
func.func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},
func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true},
%x: index, %y: index, %v: vector<5x6xf32>)
-> tensor<10x20xf32>
{
@@ -960,9 +963,9 @@ func.func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},
// CHECK-LABEL: func @linalg_op_same_out_tensors(
func.func @linalg_op_same_out_tensors(
%t1: tensor<?xf32> {linalg.inplaceable = true},
%t1: tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read"
%t2: tensor<?xf32> {linalg.inplaceable = true})
%t2: tensor<?xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>){
@@ -994,9 +997,9 @@ func.func @linalg_op_same_out_tensors(
// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
func.func @linalg_op_same_out_tensors_2(
%t1: tensor<?xf32> {linalg.inplaceable = true},
%t1: tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read"
%t2: tensor<?xf32> {linalg.inplaceable = true})
%t2: tensor<?xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
@@ -1020,7 +1023,7 @@ func.func @linalg_op_same_out_tensors_2(
func.func @double_insert_slice_into_alias(
%v1: vector<32x90xf32>,
%v2: vector<30x90xf32>,
%arg2: tensor<62x90xf32> {linalg.inplaceable = true},
%arg2: tensor<62x90xf32> {bufferization.writable = true},
%s1: index, %s2: index, %s3: index, %s4: index)
-> (tensor<62x90xf32>, tensor<?x?xf32>)
{
@@ -1061,7 +1064,7 @@ func.func @double_insert_slice_into_alias(
// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1
func.func @interleaved_extract_insert_slice_chain_1(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
%arg2: tensor<62x90xf32> {bufferization.writable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
@@ -1092,7 +1095,7 @@ func.func @interleaved_extract_insert_slice_chain_1(
// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2
func.func @interleaved_extract_insert_slice_chain_2(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
%arg2: tensor<62x90xf32> {bufferization.writable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
@@ -1123,7 +1126,7 @@ func.func @interleaved_extract_insert_slice_chain_2(
// CHECK-LABEL: func @extract_once_insert_twice
func.func @extract_once_insert_twice(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
%arg2: tensor<62x90xf32> {bufferization.writable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
@@ -1154,8 +1157,8 @@ func.func @extract_once_insert_twice(
}
// CHECK-LABEL: func @reading_scf_for
func.func @reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
func.func @reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -1201,8 +1204,8 @@ func.func @reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
}
// CHECK-LABEL: func @non_reading_scf_for
func.func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
func.func @non_reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -1250,9 +1253,9 @@ func.func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
// This example passes analysis, but it fails when bufferizing.
// CHECK-LABEL: func @scf_if_inplace1
func.func @scf_if_inplace1(%t1: tensor<?xf32> {linalg.inplaceable = true},
%t2: tensor<?xf32> {linalg.inplaceable = true},
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_inplace1(%t1: tensor<?xf32> {bufferization.writable = true},
%t2: tensor<?xf32> {bufferization.writable = true},
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -1268,9 +1271,9 @@ func.func @scf_if_inplace1(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_inplace2
func.func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_inplace2(%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -1289,9 +1292,9 @@ func.func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_inplace3
func.func @scf_if_inplace3(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_inplace3(%t1: tensor<?xf32> {bufferization.writable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
@@ -1317,9 +1320,9 @@ func.func @scf_if_inplace3(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_in_place4
func.func @scf_if_in_place4(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
func.func @scf_if_in_place4(%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
@@ -1353,8 +1356,8 @@ func.func @scf_if_in_place4(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_inplace5
func.func @scf_if_inplace5(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %cond: i1) -> tensor<?xf32> {
func.func @scf_if_inplace5(%t1: tensor<?xf32> {bufferization.writable = true},
%idx: index, %cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
@@ -1385,10 +1388,10 @@ func.func @scf_if_inplace5(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_inplace6
func.func @scf_if_inplace6(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>,
%v3: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> tensor<?xf32> {
func.func @scf_if_inplace6(%t1: tensor<?xf32> {bufferization.writable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>,
%v3: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> tensor<?xf32> {
// Test nested scf.if ops.
%r = scf.if %cond -> (tensor<?xf32>) {
%t2 = scf.if %cond2 -> (tensor<?xf32>) {
@@ -1426,9 +1429,9 @@ func.func @scf_if_inplace6(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_inplace7
func.func @scf_if_inplace7(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
func.func @scf_if_inplace7(%t1: tensor<?xf32> {bufferization.writable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
%cst = arith.constant 0.0 : f32
%r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
// CHECK: vector.transfer_write
@@ -1456,9 +1459,9 @@ func.func @scf_if_inplace7(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_out_of_place1a
func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index,
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {bufferization.writable = true},
%idx: index, %idx2: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
@@ -1483,9 +1486,9 @@ func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_out_of_place1b
func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index, %idx3: index,
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {bufferization.writable = true},
%idx: index, %idx2: index, %idx3: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
@@ -1519,8 +1522,8 @@ func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_out_of_place1c
func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {bufferization.writable = true},
%idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
@@ -1550,9 +1553,9 @@ func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_out_of_place2
func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
@@ -1574,9 +1577,9 @@ func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @scf_if_out_of_place3
func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
@@ -1605,8 +1608,8 @@ func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
// CHECK-LABEL: func @some_use
func.func @some_use(%A : tensor<?xf32> {linalg.inplaceable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
%idx = arith.constant 0 : index
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
@@ -1616,8 +1619,8 @@ func.func @some_use(%A : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-LABEL: func @main_func
func.func @main_func(%A : tensor<?xf32> {linalg.inplaceable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
// CHECK: call
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
%0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
@@ -1646,9 +1649,9 @@ func.func @to_tensor_op_not_writable(%m: memref<?xf32>, %v: vector<5xf32>,
// -----
// CHECK-LABEL: func @to_memref_op_is_reading
func.func @to_memref_op_is_reading(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx1: index, %idx2: index, %idx3: index,
%v1: vector<5xf32>)
func.func @to_memref_op_is_reading(%t1: tensor<?xf32> {bufferization.writable = true},
%idx1: index, %idx2: index, %idx3: index,
%v1: vector<5xf32>)
-> (vector<5xf32>, vector<5xf32>) {
// Write + read to/from tensor.
// CHECK: vector.transfer_write
@@ -1711,8 +1714,8 @@ func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tenso
// CHECK-LABEL: func @write_after_select_read_one
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func.func @write_after_select_read_one(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%t1 : tensor<?xf32> {bufferization.writable = true},
%t2 : tensor<?xf32> {bufferization.writable = true},
%c : i1)
-> (f32, tensor<?xf32>)
{
@@ -1737,8 +1740,8 @@ func.func @write_after_select_read_one(
// CHECK-LABEL: func @write_after_select_read_both
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func.func @write_after_select_read_both(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%t1 : tensor<?xf32> {bufferization.writable = true},
%t2 : tensor<?xf32> {bufferization.writable = true},
%c : i1)
-> (f32, f32, tensor<?xf32>)
{
@@ -1766,8 +1769,8 @@ func.func @write_after_select_read_both(
// CHECK-LABEL: func @write_after_select_no_conflict
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func.func @write_after_select_no_conflict(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%t1 : tensor<?xf32> {bufferization.writable = true},
%t2 : tensor<?xf32> {bufferization.writable = true},
%c : i1)
-> (f32, tensor<?xf32>)
{

View File

@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file -verify-diagnostics
func.func private @foo() -> tensor<?xf32>
@@ -37,7 +37,7 @@ func.func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor<f32>, %t2 : tensor<f32>
// -----
func.func @scf_if_not_equivalent(
%cond: i1, %t1: tensor<?xf32> {linalg.inplaceable = true},
%cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
%idx: index) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
@@ -54,7 +54,7 @@ func.func @scf_if_not_equivalent(
// -----
func.func @scf_if_not_aliasing(
%cond: i1, %t1: tensor<?xf32> {linalg.inplaceable = true},
%cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
%idx: index) -> f32 {
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
@@ -85,7 +85,7 @@ func.func @bar() {
// -----
func.func @scf_for(%A : tensor<?xf32>,
%B : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32>,
%lb : index, %ub : index, %step : index)
-> (f32, f32)
@@ -110,14 +110,14 @@ func.func @scf_for(%A : tensor<?xf32>,
// -----
func.func private @fun_with_side_effects(%A: tensor<?xf32> {linalg.inplaceable = true})
func.func private @fun_with_side_effects(%A: tensor<?xf32> {bufferization.writable = true})
func.func @foo(%A: tensor<?xf32> {linalg.inplaceable = true}) -> (tensor<?xf32>) {
func.func @foo(%A: tensor<?xf32> {bufferization.writable = true}) -> (tensor<?xf32>) {
call @fun_with_side_effects(%A) : (tensor<?xf32>) -> ()
return %A: tensor<?xf32>
}
func.func @scf_yield_needs_copy(%A : tensor<?xf32> {linalg.inplaceable = true}, %iters : index) {
func.func @scf_yield_needs_copy(%A : tensor<?xf32> {bufferization.writable = true}, %iters : index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%res = scf.for %arg0 = %c0 to %iters step %c1 iter_args(%bbarg = %A) -> (tensor<?xf32>) {
@@ -131,7 +131,7 @@ func.func @scf_yield_needs_copy(%A : tensor<?xf32> {linalg.inplaceable = true},
// -----
func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = true})
-> tensor<4xf32>
{
// This bufferizes to a pattern that the cross-function boundary pass needs to
@@ -184,6 +184,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> {
func.func @main() -> tensor<4xi32> {
%r = scf.execute_region -> tensor<4xi32> {
%A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
// expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
scf.yield %A: tensor<4xi32>
}
@@ -194,7 +195,7 @@ func.func @main() -> tensor<4xi32> {
// -----
func.func @to_memref_op_is_writing(
%t1: tensor<?xf32> {linalg.inplaceable = true}, %idx1: index,
%t1: tensor<?xf32> {bufferization.writable = true}, %idx1: index,
%idx2: index, %idx3: index, %v1: vector<5xf32>) -> (vector<5xf32>, vector<5xf32>) {
// This is a RaW conflict because to_memref is an inplace write and %t1 is
// read further down. This will likely have to change with partial

View File

@@ -1,12 +1,12 @@
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file | FileCheck %s
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP-LABEL
// Bufferization of bodiless function with no tensor return value.
@@ -38,7 +38,7 @@ func.func private @private_func(tensor<?xf32>) -> (f32)
// CHECK-NOT: alloc
// CHECK-NOT: copy
// CHECK: call @private_func(%[[t]])
func.func @main(%t: tensor<?xf32> {linalg.inplaceable = true}) -> (f32) {
func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) {
%0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
return %0 : f32
}
@@ -57,7 +57,7 @@ func.func private @private_func(tensor<?xf32>) -> (f32)
// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
// CHECK: call @private_func(%[[casted]])
// CHECK: memref.dealloc %[[alloc]]
func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
%0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
return %0 : f32
}
@@ -106,7 +106,7 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-LABEL: func @call_func_with_non_tensor_return(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
func.func @call_func_with_non_tensor_return(
%t0: tensor<?xf32> {linalg.inplaceable = true}) -> (f32, tensor<?xf32>) {
%t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
// CHECK-NOT: alloc
// CHECK-NOT: copy
// CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
@@ -138,7 +138,7 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-LABEL: func @call_func_with_non_tensor_return(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
func.func @call_func_with_non_tensor_return(
%t0: tensor<?xf32> {linalg.inplaceable = false}) -> (f32, tensor<?xf32>) {
%t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) {
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
@@ -184,7 +184,7 @@ func.func @f2(%t: tensor<?xf32>) -> (f32) {
// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
// CHECK: call @f2(%[[casted]])
// CHECK: memref.dealloc %[[alloc]]
func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
%0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
return %0 : f32
}
@@ -211,7 +211,7 @@ func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
// CHECK: call @does_not_read(%[[casted]])
// CHECK: %[[r:.*]] = memref.load %[[alloc]]
// CHECK: memref.dealloc %[[alloc]]
func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> f32 {
func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
%0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
%idx = arith.constant 4 : index
%r = tensor.extract %0[%idx] : tensor<?xf32>
@@ -344,9 +344,9 @@ func.func @scf_for_with_tensor_insert_slice(
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
func.func @bar(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = true},
%C : tensor<4xf32> {linalg.inplaceable = true},
%A : tensor<?xf32> {bufferization.writable = true},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = true},
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
{
@@ -447,9 +447,10 @@ func.func private @external_func(tensor<?xf32>)
// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
func.func @callee(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
%B : tensor<?xf32>,
%C : tensor<?xf32>) {
func.func @callee(
%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
%B : tensor<?xf32>,
%C : tensor<?xf32>) {
// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]>
// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
call @external_func(%A) : (tensor<?xf32>) -> ()
@@ -467,9 +468,9 @@ func.func @callee(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0,
// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32>
// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
func.func @entry(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, linalg.inplaceable = false},
%C : tensor<?xf32> {linalg.inplaceable = false}) {
func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
%B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
%C : tensor<?xf32> {bufferization.writable = false}) {
// Note: `callee` does not write to its bbArg directly, but `external_func`
// does. Inside `callee`, the writes via `external_func` do not cause a
// conflict. However, inside `entry`, the writes do cause a conflict because
@@ -505,8 +506,8 @@ func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
// CHECK-LABEL: func @equivalent_func_arg(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
func.func @equivalent_func_arg(%t0: tensor<?xf32> {linalg.inplaceable = true},
%c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true},
%c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
// CHECK-NOT: alloc
// CHECK-NOT: copy
%1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
@@ -534,8 +535,8 @@ func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
// CHECK-LABEL: func @equivalent_func_arg_2(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {linalg.inplaceable = true},
%c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true},
%c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
// CHECK: scf.for {{.*}} {
%1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
// CHECK: %[[alloc:.*]] = memref.alloc
@@ -549,3 +550,23 @@ func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {linalg.inplaceable = true},
}
return %1: tensor<?xf32>
}
// -----
// Bufferize without fully dynamic layout maps.
// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
func.func @transfer_read(
%A : tensor<?xf32> {bufferization.writable = false})
-> (vector<4xf32>)
{
%c0 = arith.constant 0 : index
%f0 = arith.constant 0.0 : f32
// CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
%0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
// CHECK: return %[[RES]] : vector<4xf32>
return %0 : vector<4xf32>
}

View File

@@ -7,9 +7,9 @@
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1234(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -34,9 +34,9 @@ func.func @fill_extract_matmul_1234(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1243(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -60,9 +60,10 @@ func.func @fill_extract_matmul_1243(
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func.func @fill_extract_matmul_1324(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -86,9 +87,10 @@ func.func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_la
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func.func @fill_extract_matmul_1342(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -112,9 +114,10 @@ func.func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_la
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func.func @fill_extract_matmul_1423(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -138,9 +141,10 @@ func.func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_la
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
func.func @fill_extract_matmul_1432(
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -165,9 +169,9 @@ func.func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_la
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2134(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -192,9 +196,9 @@ func.func @fill_extract_matmul_2134(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2143(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -219,9 +223,9 @@ func.func @fill_extract_matmul_2143(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2314(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -246,9 +250,9 @@ func.func @fill_extract_matmul_2314(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2341(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -273,9 +277,9 @@ func.func @fill_extract_matmul_2341(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2413(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -300,9 +304,9 @@ func.func @fill_extract_matmul_2413(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_2431(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -327,9 +331,9 @@ func.func @fill_extract_matmul_2431(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3124(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -354,9 +358,9 @@ func.func @fill_extract_matmul_3124(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3142(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -381,10 +385,9 @@ func.func @fill_extract_matmul_3142(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3214(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true}) -> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
@@ -408,9 +411,9 @@ func.func @fill_extract_matmul_3214(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3241(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -435,9 +438,9 @@ func.func @fill_extract_matmul_3241(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3412(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -462,9 +465,9 @@ func.func @fill_extract_matmul_3412(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_3421(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -489,9 +492,9 @@ func.func @fill_extract_matmul_3421(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4123(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -516,9 +519,9 @@ func.func @fill_extract_matmul_4123(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4132(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -543,9 +546,9 @@ func.func @fill_extract_matmul_4132(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4213(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -570,9 +573,9 @@ func.func @fill_extract_matmul_4213(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4231(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -597,9 +600,9 @@ func.func @fill_extract_matmul_4231(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4312(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
@@ -624,9 +627,9 @@ func.func @fill_extract_matmul_4312(
// CHECK-LABEL: func @fill_extract_matmul_
func.func @fill_extract_matmul_4321(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
%arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index

View File

@@ -3,9 +3,9 @@
// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
func.func @linalg_op_bufferizes_inplace_with_input(
%t1: tensor<?x?xf32> {linalg.inplaceable = true},
%t2: tensor<?xf32> {linalg.inplaceable = false},
%t3: tensor<?x?xf32> {linalg.inplaceable = false},
%t1: tensor<?x?xf32> {bufferization.writable = true},
%t2: tensor<?xf32> {bufferization.writable = false},
%t3: tensor<?x?xf32> {bufferization.writable = false},
%s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
// CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
%r = linalg.generic {
@@ -27,9 +27,9 @@ func.func @linalg_op_bufferizes_inplace_with_input(
// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
func.func @linalg_op_bufferizes_out_of_place_with_input(
%t1: tensor<?x?xf32> {linalg.inplaceable = false},
%t2: tensor<?xf32> {linalg.inplaceable = false},
%t3: tensor<?x?xf32> {linalg.inplaceable = false},
%t1: tensor<?x?xf32> {bufferization.writable = false},
%t2: tensor<?xf32> {bufferization.writable = false},
%t3: tensor<?x?xf32> {bufferization.writable = false},
%s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: memref.copy %[[t1]], %[[alloc]]
@@ -54,9 +54,9 @@ func.func @linalg_op_bufferizes_out_of_place_with_input(
// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
func.func @linalg_op_output_cannot_alias_with_input(
%t1: tensor<?x?xf32> {linalg.inplaceable = true},
%t2: tensor<?xf32> {linalg.inplaceable = false},
%t3: tensor<?x?xf32> {linalg.inplaceable = true},
%t1: tensor<?x?xf32> {bufferization.writable = true},
%t2: tensor<?xf32> {bufferization.writable = false},
%t3: tensor<?x?xf32> {bufferization.writable = true},
%s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
// CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
%r = linalg.generic {

View File

@@ -16,9 +16,9 @@
// CHECK-LABEL: func @linalg_op_same_out_tensors(
func.func @linalg_op_same_out_tensors(
%t1: tensor<?xf32> {linalg.inplaceable = true},
%t1: tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read-write"
%t2: tensor<?xf32> {linalg.inplaceable = true})
%t2: tensor<?xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>){
@@ -54,9 +54,9 @@ func.func @linalg_op_same_out_tensors(
// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
func.func @linalg_op_same_out_tensors_2(
%t1: tensor<?xf32> {linalg.inplaceable = true},
%t1: tensor<?xf32> {bufferization.writable = true},
// CHECK-SAME: bufferization.access = "read-write"
%t2: tensor<?xf32> {linalg.inplaceable = true})
%t2: tensor<?xf32> {bufferization.writable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){

View File

@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// CHECK-LABEL: func @buffer_forwarding_conflict
func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {bufferization.writable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
%cst = arith.constant 0.000000e+00 : f32
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
@@ -34,7 +34,7 @@ func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable =
// -----
// CHECK-LABEL: func @buffer_forwarding_no_conflict
func.func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
func.func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {bufferization.writable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
%cst = arith.constant 0.000000e+00 : f32
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]

View File

@@ -6,7 +6,7 @@
// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
func.func @buffer_forwarding_conflict(
%t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
%t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0)>, bufferization.writable = true},
%sz: index)
-> (tensor<?xf32>, tensor<?xf32>)
{
@@ -43,7 +43,7 @@ func.func @buffer_forwarding_conflict(
// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
func.func @buffer_forwarding_no_conflict(
%t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
%t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0)>, bufferization.writable = true},
%sz: index)
-> (tensor<?xf32>)
{

View File

@@ -8,31 +8,13 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
func.func @transfer_read(
%A : tensor<?xf32> {linalg.inplaceable = false})
-> (vector<4xf32>)
{
%c0 = arith.constant 0 : index
%f0 = arith.constant 0.0 : f32
// CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
%0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
// CHECK: return %[[RES]] : vector<4xf32>
return %0 : vector<4xf32>
}
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @fill_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
func.func @fill_inplace(
%A : tensor<?xf32> {linalg.inplaceable = true})
%A : tensor<?xf32> {bufferization.writable = true})
-> tensor<?xf32>
{
// CHECK: %[[F0:.*]] = arith.constant 0.000000e+00 : f32
@@ -51,7 +33,7 @@ func.func @fill_inplace(
// -----
// CHECK-LABEL: func @tensor_extract(%{{.*}}: memref<?xf32, #{{.*}}>) -> f32 {
func.func @tensor_extract(%A : tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
func.func @tensor_extract(%A : tensor<?xf32> {bufferization.writable = false}) -> (f32) {
%c0 = arith.constant 0 : index
// CHECK: %[[RES:.*]] = memref.load {{.*}} : memref<?xf32, #{{.*}}>
@@ -65,12 +47,12 @@ func.func @tensor_extract(%A : tensor<?xf32> {linalg.inplaceable = false}) -> (f
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
/// No linalg.inplaceable flag, must allocate.
/// No bufferization.writable flag, must allocate.
// CHECK-LABEL: func @not_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>) -> memref<?xf32> {
// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?xf32>) -> memref<?xf32>
func.func @not_inplace(
%A : tensor<?xf32> {linalg.inplaceable = false})
%A : tensor<?xf32> {bufferization.writable = false})
-> tensor<?xf32>
{
// CHECK: %[[F0:.*]] = arith.constant 0.000000e+00 : f32
@@ -94,7 +76,7 @@ func.func @not_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, #[[$map_2d_dyn]]>) {
// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
func.func @not_inplace(
%A : tensor<?x?xf32> {linalg.inplaceable = true})
%A : tensor<?x?xf32> {bufferization.writable = true})
-> tensor<?x?xf32>
{
%f0 = arith.constant 0.0 : f32
@@ -120,7 +102,8 @@ func.func @not_inplace(
// -----
// CHECK-LABEL: func @not_inplace
func.func @not_inplace(%A : tensor<?x?xf32> {linalg.inplaceable = true}) -> tensor<?x?xf32> {
func.func @not_inplace(
%A : tensor<?x?xf32> {bufferization.writable = true}) -> tensor<?x?xf32> {
/// Within op multiple uses of %A, must alloc.
// CHECK: alloc
%r = linalg.matmul ins(%A, %A: tensor<?x?xf32>, tensor<?x?xf32>)
@@ -132,8 +115,9 @@ func.func @not_inplace(%A : tensor<?x?xf32> {linalg.inplaceable = true}) -> tens
// -----
// CHECK-LABEL: func @vec_inplace
func.func @vec_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : vector<4xf32>)
-> tensor<?xf32>
func.func @vec_inplace(
%A : tensor<?xf32> {bufferization.writable = true}, %vec : vector<4xf32>)
-> tensor<?xf32>
{
%c0 = arith.constant 0 : index
@@ -151,8 +135,9 @@ func.func @vec_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : ve
// CHECK-LABEL: func @vec_not_inplace
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
func.func @vec_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : vector<4xf32>)
-> (tensor<?xf32>, tensor<?xf32>)
func.func @vec_not_inplace(
%A : tensor<?xf32> {bufferization.writable = true}, %vec : vector<4xf32>)
-> (tensor<?xf32>, tensor<?xf32>)
{
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -182,10 +167,11 @@ func.func @vec_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec
// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun(%A0 : tensor<?xf32> {linalg.inplaceable = false},
%A1 : tensor<?xf32> {linalg.inplaceable = true},
%t0 : tensor<4xf32> {linalg.inplaceable = false},
%t1 : tensor<4xf32> {linalg.inplaceable = true})
func.func @insert_slice_fun(
%A0 : tensor<?xf32> {bufferization.writable = false},
%A1 : tensor<?xf32> {bufferization.writable = true},
%t0 : tensor<4xf32> {bufferization.writable = false},
%t1 : tensor<4xf32> {bufferization.writable = true})
-> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
{
// Hoisted allocs.
@@ -230,8 +216,8 @@ func.func @insert_slice_fun(%A0 : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun(
%A : tensor<?xf32> {linalg.inplaceable = true},
%t : tensor<4xf32> {linalg.inplaceable = false})
%A : tensor<?xf32> {bufferization.writable = true},
%t : tensor<4xf32> {bufferization.writable = false})
-> tensor<?xf32>
{
%f0 = arith.constant 0.0 : f32
@@ -258,8 +244,8 @@ func.func @insert_slice_fun(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun(
%A : tensor<?xf32> {linalg.inplaceable = true},
%t : tensor<4xf32> {linalg.inplaceable = false})
%A : tensor<?xf32> {bufferization.writable = true},
%t : tensor<4xf32> {bufferization.writable = false})
-> tensor<?xf32>
{
%f0 = arith.constant 0.0 : f32
@@ -286,8 +272,8 @@ func.func @insert_slice_fun(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun_not_inplace(
%A : tensor<?xf32> {linalg.inplaceable = false},
%t : tensor<4xf32> {linalg.inplaceable = false})
%A : tensor<?xf32> {bufferization.writable = false},
%t : tensor<4xf32> {bufferization.writable = false})
-> tensor<?xf32>
{
// CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
@@ -312,9 +298,10 @@ func.func @insert_slice_fun_not_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: ) -> memref<?xf32> {
func.func @scf_for_yield_only(%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%lb : index, %ub : index, %step : index)
func.func @scf_for_yield_only(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
@@ -342,8 +329,8 @@ func.func @scf_for_yield_only(%A : tensor<?xf32> {linalg.inplaceable = false},
// just want to make sure that it does not crash.
// CHECK-LABEL: func @nested_scf_for
func.func @nested_scf_for(%A : tensor<?xf32> {linalg.inplaceable = true},
%v : vector<5xf32>) -> tensor<?xf32> {
func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
%v : vector<5xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
@@ -366,10 +353,10 @@ func.func @nested_scf_for(%A : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%C : tensor<4xf32> {linalg.inplaceable = false},
%lb : index, %ub : index, %step : index)
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = false},
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
@@ -407,8 +394,9 @@ func.func @scf_for_with_tensor.insert_slice(
// CHECK-LABEL: func @execute_region_with_conflict(
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
func.func @execute_region_with_conflict(%t1 : tensor<?xf32> {linalg.inplaceable = "true"})
-> (f32, tensor<?xf32>, f32)
func.func @execute_region_with_conflict(
%t1 : tensor<?xf32> {bufferization.writable = "true"})
-> (f32, tensor<?xf32>, f32)
{
%f1 = arith.constant 0.0 : f32
%idx = arith.constant 7 : index
@@ -439,10 +427,10 @@ func.func @execute_region_with_conflict(%t1 : tensor<?xf32> {linalg.inplaceable
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32>
// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<128x192xf32>
func.func @matmul(
%A: tensor<128x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%B: tensor<256x192xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%C: tensor<128x192xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<128x192xf32> {
%A: tensor<128x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%B: tensor<256x192xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
%C: tensor<128x192xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
-> tensor<128x192xf32> {
%c0 = arith.constant 0 : index
%c256 = arith.constant 256 : index
%c32 = arith.constant 32 : index
@@ -513,8 +501,8 @@ func.func @matmul(
// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
// CHECK: memref.copy %[[alloc]], %[[subview]]
func.func @tensor_cast_not_in_place(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false}, %idx: index)
%A : tensor<?xf32> {bufferization.writable = true},
%B : tensor<?xf32> {bufferization.writable = false}, %idx: index)
-> (tensor<?xf32>)
{
%r0 = tensor.cast %A : tensor<?xf32> to tensor<4xf32>
@@ -533,7 +521,7 @@ func.func @tensor_cast_not_in_place(
// CHECK-LABEL: func @dominance_violation_bug_1
func.func @dominance_violation_bug_1(
%A : tensor<?x?xf32> {linalg.inplaceable = false},
%A : tensor<?x?xf32> {bufferization.writable = false},
%idx : index)
-> tensor<?x?xf32>
{
@@ -553,8 +541,8 @@ func.func @dominance_violation_bug_1(
// CHECK-LABEL: func @scf_if_inplace(
// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref<?xf32{{.*}}>, %[[v:.*]]: vector
func.func @scf_if_inplace(%cond: i1,
%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index) -> tensor<?xf32> {
%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index) -> tensor<?xf32> {
// CHECK: scf.if %[[cond]] {
// CHECK-NEXT: } else {
@@ -582,9 +570,12 @@ func.func @scf_if_inplace(%cond: i1,
// CHECK: vector.transfer_write
// CHECK: }
// CHECK: }
func.func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
func.func @scf_if_inside_scf_for(
%t1: tensor<?xf32> {bufferization.writable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1)
-> tensor<?xf32>
{
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
@@ -606,8 +597,8 @@ func.func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}>
func.func @scf_if_non_equiv_yields(
%b : i1,
%A : tensor<4xf32> {linalg.inplaceable = false},
%B : tensor<4xf32> {linalg.inplaceable = false})
%A : tensor<4xf32> {bufferization.writable = false},
%B : tensor<4xf32> {bufferization.writable = false})
-> tensor<4xf32>
{
// CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]]
@@ -624,8 +615,8 @@ func.func @scf_if_non_equiv_yields(
// CHECK-LABEL: func @insert_op
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, {{.*}}>, %[[s:.*]]: f32, %[[i:.*]]: index
func.func @insert_op(%t1 : tensor<?xf32> {linalg.inplaceable = true},
%s : f32, %i : index) -> tensor<?xf32> {
func.func @insert_op(%t1 : tensor<?xf32> {bufferization.writable = true},
%s : f32, %i : index) -> tensor<?xf32> {
// CHECK: memref.store %[[s]], %[[t1]][%[[i]]]
%0 = tensor.insert %s into %t1[%i] : tensor<?xf32>
// CHECK: return
@@ -635,9 +626,11 @@ func.func @insert_op(%t1 : tensor<?xf32> {linalg.inplaceable = true},
// -----
func.func @gather_like(
%arg0 : tensor<?x?xf32> {linalg.inplaceable = false},
%arg1 : tensor<?xi32> {linalg.inplaceable = false},
%arg2 : tensor<?x?xf32> {linalg.inplaceable = true}) -> tensor<?x?xf32> {
%arg0 : tensor<?x?xf32> {bufferization.writable = false},
%arg1 : tensor<?xi32> {bufferization.writable = false},
%arg2 : tensor<?x?xf32> {bufferization.writable = true})
-> tensor<?x?xf32>
{
%0 = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0)>,
affine_map<(d0, d1) -> (d0, d1)>],
@@ -667,10 +660,12 @@ func.func @gather_like(
// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
func.func @linalg_op_bufferizes_inplace_with_input(
%t1: tensor<?x?xf32> {linalg.inplaceable = true},
%t2: tensor<?xf32> {linalg.inplaceable = true},
%t3: tensor<?x?xf32> {linalg.inplaceable = true},
%s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
%t1: tensor<?x?xf32> {bufferization.writable = true},
%t2: tensor<?xf32> {bufferization.writable = true},
%t3: tensor<?x?xf32> {bufferization.writable = true},
%s1: index, %s2: index, %cst: f32)
-> tensor<?x?xf32>
{
// CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
%r = linalg.generic {
indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -699,7 +694,7 @@ func.func @linalg_op_bufferizes_inplace_with_input(
// CHECK-LABEL: func @op_is_reading_but_following_ops_are_not
// CHECK-SAME: %[[t0:.*]]: memref<?xf32
func.func @op_is_reading_but_following_ops_are_not(
%t0 : tensor<?xf32> {linalg.inplaceable = false},
%t0 : tensor<?xf32> {bufferization.writable = false},
%cst : f32)
-> tensor<?xf32>
{
@@ -751,8 +746,8 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x
// CHECK-LABEL: func @write_to_select_op_source
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
func.func @write_to_select_op_source(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%t1 : tensor<?xf32> {bufferization.writable = true},
%t2 : tensor<?xf32> {bufferization.writable = true},
%c : i1)
-> (tensor<?xf32>, tensor<?xf32>)
{
@@ -773,8 +768,8 @@ func.func @write_to_select_op_source(
// CHECK-LABEL: func @write_after_select_read_one
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
func.func @write_after_select_read_one(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%t1 : tensor<?xf32> {bufferization.writable = true},
%t2 : tensor<?xf32> {bufferization.writable = true},
%c : i1)
-> (f32, tensor<?xf32>)
{
@@ -910,9 +905,8 @@ func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,
// CHECK-LABEL: func @scf_for_swapping_yields(
// CHECK-SAME: %[[A:.*]]: memref<?xf32, #{{.*}}>, %[[B:.*]]: memref<?xf32, #{{.*}}>
func.func @scf_for_swapping_yields(
%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true},
%A : tensor<?xf32>, %B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
-> (f32, f32)
{

View File

@@ -7259,7 +7259,6 @@ cc_library(
":LinalgStructuredOpsIncGen",
":MathDialect",
":MemRefDialect",
":ModuleBufferization",
":Pass",
":SCFDialect",
":SCFTransforms",
@@ -7281,25 +7280,6 @@ cc_library(
],
)
cc_library(
name = "ModuleBufferization",
srcs = [
"lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp",
],
hdrs = [
"include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h",
],
includes = ["include"],
deps = [
":BufferizationDialect",
":BufferizationTransforms",
":FuncDialect",
":IR",
":MemRefDialect",
"//llvm:Support",
],
)
cc_library(
name = "TilingInterface",
srcs = ["lib/Interfaces/TilingInterface.cpp"],