[mlir][bufferization] Move ModuleBufferization to bufferization dialect

* Move Module Bufferization to the bufferization dialect. The implementation is split into `OneShotModuleBufferize.cpp` and `FuncBufferizableOpInterfaceImpl.cpp`, so that the external model implementation can be easily moved to the func dialect in the future. * Split and clean up test cases. A few test cases are still remaining in Linalg and will be updated separately. * `linalg.inplaceable` is renamed to `bufferization.writable` to accurately reflect its current usage. * Attributes and their verifiers are moved from the Linalg dialect to the Bufferization dialect. * Expand documentation. * Add a new flag to One-Shot Bufferize to allow for function boundary bufferization. Differential Revision: https://reviews.llvm.org/D122229
2022-04-22 18:08:44 +09:00
parent 3e1d2c352c
commit e07a7fd5c0
33 changed files with 1620 additions and 1487 deletions
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
@@ -326,17 +326,12 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
          && !bufferizableOp.getAliasingOpResult(opOperand, state).empty();
    }

-    // TODO: The following two attributes should belong to the tensor dialect.
-    // The corresponding verifier should also be in the tensor dialect.
+    // TODO: This attribute is deprecated. Use `bufferization.writable` or add
+    // a new attribute in a different dialect.
    /// Attribute name used to mark region arguments that can be bufferized
    /// in-place during one-shot bufferization.
    constexpr const static ::llvm::StringLiteral
-      kInplaceableAttrName = "linalg.inplaceable";
-
-    /// Attribute name used to mark the bufferization layout for region
-    /// arguments during one-shot bufferization.
-    constexpr const static ::llvm::StringLiteral
-      kBufferLayoutAttrName = "linalg.buffer_layout";
+        kInplaceableAttrName = "linalg.inplaceable";
  }];
 }

--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
@@ -26,6 +26,19 @@ def Bufferization_Dialect : Dialect {
    deallocation](/docs/BufferDeallocationInternals/).
  }];
  let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"];
+
+  let extraClassDeclaration = [{
+    /// An attribute that can override writability of buffers of tensor function
+    /// arguments during One-Shot Module Bufferize.
+    constexpr const static ::llvm::StringLiteral
+        kWritableAttrName = "bufferization.writable";
+
+    /// Attribute name used to mark the bufferization layout for region
+    /// arguments during One-Shot Module Bufferize.
+    constexpr const static ::llvm::StringLiteral
+        kBufferLayoutAttrName = "bufferization.buffer_layout";
+  }];
+  let hasOperationAttrVerify = 1;
 }

 #endif // BUFFERIZATION_BASE
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h
@@ -0,0 +1,76 @@
+//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H
+#define MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H
+
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+
+namespace mlir {
+class DialectRegistry;
+
+namespace func {
+class FuncOp;
+} // namespace func
+
+namespace bufferization {
+namespace func_ext {
+/// The state of analysis of a FuncOp.
+enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };
+
+using func::FuncOp;
+
+/// Extra analysis state that is required for bufferization of function
+/// boundaries.
+struct FuncAnalysisState : public DialectAnalysisState {
+  // Note: Function arguments and/or function return values may disappear during
+  // bufferization. Functions and their CallOps are analyzed and bufferized
+  // separately. To ensure that a CallOp analysis/bufferization can access an
+  // already bufferized function's analysis results, we store bbArg/return value
+  // indices instead of BlockArguments/OpOperand pointers.
+
+  /// A set of block argument indices.
+  using BbArgIndexSet = DenseSet<int64_t>;
+
+  /// A mapping of indices to indices.
+  using IndexMapping = DenseMap<int64_t, int64_t>;
+
+  /// A mapping of indices to a list of indices.
+  using IndexToIndexListMapping = DenseMap<int64_t, SmallVector<int64_t>>;
+
+  /// A mapping of ReturnOp OpOperand indices to equivalent FuncOp BBArg
+  /// indices.
+  DenseMap<FuncOp, IndexMapping> equivalentFuncArgs;
+
+  /// A mapping of ReturnOp OpOperand indices to aliasing FuncOp BBArg indices.
+  DenseMap<FuncOp, IndexToIndexListMapping> aliasingFuncArgs;
+
+  /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices.
+  DenseMap<FuncOp, IndexToIndexListMapping> aliasingReturnVals;
+
+  /// A set of all read BlockArguments of FuncOps.
+  DenseMap<FuncOp, BbArgIndexSet> readBbArgs;
+
+  /// A set of all written-to BlockArguments of FuncOps.
+  DenseMap<FuncOp, BbArgIndexSet> writtenBbArgs;
+
+  /// Keep track of which FuncOps are fully analyzed or currently being
+  /// analyzed.
+  DenseMap<FuncOp, FuncOpAnalysisState> analyzedFuncOps;
+
+  /// This function is called right before analyzing the given FuncOp. It
+  /// initializes the data structures for the FuncOp in this state object.
+  void startFunctionAnalysis(FuncOp funcOp);
+};
+
+void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
+} // namespace func_ext
+} // namespace bufferization
+} // namespace mlir
+
+#endif // MLIR_BUFFERIZATION_TRANSFORMS_FUNCBUFFERIZABLEOPINTERFACEIMPL_H
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h
@@ -0,0 +1,31 @@
+//===- OneShotModuleBufferize.h - Bufferization across Func. Boundaries ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H
+#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H
+
+namespace mlir {
+
+struct LogicalResult;
+class ModuleOp;
+
+namespace bufferization {
+struct OneShotBufferizationOptions;
+
+/// Run One-Shot Module Bufferization on the given module. Performs a simple
+/// function call analysis to determine which function arguments are
+/// inplaceable. Then analyzes and bufferizes FuncOps one-by-one with One-Shot
+/// Bufferize.
+LogicalResult
+runOneShotModuleBufferize(ModuleOp moduleOp,
+                          bufferization::OneShotBufferizationOptions options);
+
+} // namespace bufferization
+} // namespace mlir
+
+#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ONESHOTMODULEBUFFERIZE_H
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -200,6 +200,34 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
    prints analysis results and explains why an OpOperand was decided to
    bufferize out-of-place. This is useful for understanding why One-Shot
    Bufferize chose to insert a certain buffer copy.
+
+    `bufferize-function-boundaries` is an experimental flag for bufferizing
+    `FuncOp`, `ReturnOp` and `CallOp`. This feature is still under development
+    and supports only simple cases at the moment. In particular:
+
+    * Recursive or circular function call graphs are not supported.
+    * If a newly allocated buffer is returned from a function (with
+      `allow-return-allocs`), the buffer will never be deallocated and leak.
+      Such IR needs special handling, e.g., allocation hoisting or reference
+      counting.
+    * External functions (without bodies) that return a tensor are not
+      supported.
+    * Function with multiple blocks or multiple ReturnOps are not supported.
+
+    One-Shot Bufferize implements the following contract around function calls:
+    The buffer of function arguments is always writable (unless annotated with
+    `bufferization.writable = false`). A buffer copy may be inserted at the call
+    site where necessary. Alias sets and equivalence info is propagated through
+    function calls. Whenever a function is bufferized, all other functions that
+    are being called were already analyzed and bufferized, so exact alias and
+    equivalence information is available. This is why recursive function calls
+    are not yet supported.
+
+    One-Shot Bufferize gathers additional information during the analysis phase
+    when function boundary bufferization is activated. E.g., whether a function
+    argument is read/written and which returned values are aliasing/equivalent.
+    For debugging purposes, such information can be printed with
+    `test-analysis-only`.
  }];
  let options = [
    Option<"allowReturnAllocs", "allow-return-allocs", "bool",
@@ -211,6 +239,9 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
    Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
           /*default=*/"0",
           "Test only: Analyze ops in random order with a given seed (fuzzer)">,
+    Option<"bufferizeFunctionBoundaries", "bufferize-function-boundaries",
+           "bool", /*default=*/"0",
+           "Bufferize function boundaries (experimental).">,
    Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
           "Specify if buffers should be deallocated. For compatibility with "
           "core bufferization passes.">,
--- a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt
@@ -1,4 +1,3 @@
-add_subdirectory(ComprehensiveBufferize)
 add_subdirectory(IR)

 set(LLVM_TARGET_DEFINITIONS Passes.td)
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -1,2 +0,0 @@
-# no targets defined here
-
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h
@@ -1,43 +0,0 @@
-//===- ModuleBufferization.h - Bufferization across Func. Boundaries ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H
-#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H
-
-#include <memory>
-
-namespace mlir {
-
-class DialectRegistry;
-struct LogicalResult;
-class ModuleOp;
-
-namespace bufferization {
-struct OneShotBufferizationOptions;
-} // namespace bufferization
-
-namespace linalg {
-namespace comprehensive_bufferize {
-
-/// Run Module Bufferization on the given module. Performs a simple function
-/// call analysis to determine which function arguments are inplaceable. Then
-/// analyzes and bufferizes FuncOps one-by-one with One-Shot Bufferize.
-LogicalResult
-runModuleBufferize(ModuleOp moduleOp,
-                   bufferization::OneShotBufferizationOptions options);
-
-namespace std_ext {
-
-void registerModuleBufferizationExternalModels(DialectRegistry &registry);
-
-} // namespace std_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_MODULEBUFFERIZATION_H
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -22,6 +22,7 @@
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
@@ -46,6 +47,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
+#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
@@ -100,8 +102,11 @@ inline void registerAllDialects(DialectRegistry &registry) {
                  x86vector::X86VectorDialect>();
  // clang-format on
  arith::registerBufferizableOpInterfaceExternalModels(registry);
+  bufferization::func_ext::registerBufferizableOpInterfaceExternalModels(
+      registry);
  linalg::registerBufferizableOpInterfaceExternalModels(registry);
  scf::registerBufferizableOpInterfaceExternalModels(registry);
+  shape::registerBufferizableOpInterfaceExternalModels(registry);
  tensor::registerBufferizableOpInterfaceExternalModels(registry);
  tensor::registerInferTypeOpInterfaceExternalModels(registry);
  tensor::registerTilingOpInterfaceExternalModels(registry);
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -33,11 +33,6 @@ namespace bufferization {
 using namespace mlir;
 using namespace bufferization;

-/// Attribute name used to mark the bufferization layout for region
-/// arguments during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
-
 /// Attribute name used to mark region arguments that can be bufferized
 /// in-place during linalg comprehensive bufferization.
 constexpr const ::llvm::StringLiteral
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/Transforms/InliningUtils.h"

 using namespace mlir;
@@ -14,6 +15,15 @@ using namespace mlir::bufferization;

 #include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.cpp.inc"

+/// Attribute name used to mark function arguments who's buffers can be written
+/// to during One-Shot Module Bufferize.
+constexpr const ::llvm::StringLiteral BufferizationDialect::kWritableAttrName;
+
+/// Attribute name used to mark the bufferization layout for region arguments
+/// during One-Shot Module Bufferize.
+constexpr const ::llvm::StringLiteral
+    BufferizationDialect::kBufferLayoutAttrName;
+
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect Interfaces
 //===----------------------------------------------------------------------===//
@@ -41,3 +51,33 @@ void mlir::bufferization::BufferizationDialect::initialize() {
      >();
  addInterfaces<BufferizationInlinerInterface>();
 }
+
+LogicalResult
+BufferizationDialect::verifyOperationAttribute(Operation *op,
+                                               NamedAttribute attr) {
+  using bufferization::BufferizableOpInterface;
+
+  if (attr.getName() == kWritableAttrName) {
+    if (!attr.getValue().isa<BoolAttr>()) {
+      return op->emitError() << "'" << kWritableAttrName
+                             << "' is expected to be a boolean attribute";
+    }
+    if (!isa<FunctionOpInterface>(op))
+      return op->emitError() << "expected " << attr.getName()
+                             << " to be used on function-like operations";
+    return success();
+  }
+  if (attr.getName() == kBufferLayoutAttrName) {
+    if (!attr.getValue().isa<AffineMapAttr>()) {
+      return op->emitError() << "'" << kBufferLayoutAttrName
+                             << "' is expected to be a affine map attribute";
+    }
+    if (!isa<FunctionOpInterface>(op))
+      return op->emitError() << "expected " << attr.getName()
+                             << " to be used on function-like operations";
+    return success();
+  }
+
+  return op->emitError() << "attribute '" << attr.getName()
+                         << "' not supported by the bufferization dialect";
+}
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Operation.h"
@@ -178,8 +179,10 @@ struct OneShotBufferizePass
      BufferizationOptions::OpFilterEntry::FilterFn filterFn =
          [&](Operation *op) {
            // Disallow non-func dialect ops. I.e., no ops related to function
-            // calls.
-            if (isa<func::FuncDialect>(op->getDialect()))
+            // calls. (Unless explicitly activated.)
+            bool isFuncBoundaryOp =
+                isa_and_nonnull<func::FuncDialect>(op->getDialect());
+            if (!this->bufferizeFunctionBoundaries && isFuncBoundaryOp)
              return false;
            // Filter may be specified via options.
            if (this->dialectFilter.hasValue())
@@ -195,9 +198,16 @@ struct OneShotBufferizePass
    }

    ModuleOp moduleOp = getOperation();
-    if (failed(runOneShotBufferize(moduleOp, opt))) {
-      signalPassFailure();
-      return;
+    if (bufferizeFunctionBoundaries) {
+      if (failed(runOneShotModuleBufferize(moduleOp, opt))) {
+        signalPassFailure();
+        return;
+      }
+    } else {
+      if (failed(runOneShotBufferize(moduleOp, opt))) {
+        signalPassFailure();
+        return;
+      }
    }

    if (opt.testAnalysisOnly)
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -4,7 +4,9 @@ add_mlir_dialect_library(MLIRBufferizationTransforms
  BufferOptimizations.cpp
  BufferResultsToOutParams.cpp
  BufferUtils.cpp
+  FuncBufferizableOpInterfaceImpl.cpp
  OneShotAnalysis.cpp
+  OneShotModuleBufferize.cpp

  ADDITIONAL_HEADER_DIRS
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization
--- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
@@ -0,0 +1,542 @@
+//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+namespace bufferization {
+namespace func_ext {
+
+void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) {
+  analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress;
+  auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping());
+  auto createdAliasingOperands =
+      aliasingFuncArgs.try_emplace(funcOp, IndexToIndexListMapping());
+  auto createdAliasingResults =
+      aliasingReturnVals.try_emplace(funcOp, IndexToIndexListMapping());
+  auto createdRead = readBbArgs.try_emplace(funcOp, BbArgIndexSet());
+  auto createdWritten = writtenBbArgs.try_emplace(funcOp, BbArgIndexSet());
+  (void)createdEquiv;
+  (void)createdAliasingOperands;
+  (void)createdAliasingResults;
+  (void)createdRead;
+  (void)createdWritten;
+#ifndef NDEBUG
+  assert(createdEquiv.second && "equivalence info exists already");
+  assert(createdAliasingOperands.second && "aliasing info exists already");
+  assert(createdAliasingResults.second && "aliasing info exists already");
+  assert(createdRead.second && "bbarg access info exists already");
+  assert(createdWritten.second && "bbarg access info exists already");
+#endif // NDEBUG
+}
+
+/// Return the unique ReturnOp that terminates `funcOp`.
+/// Return nullptr if there is no such unique ReturnOp.
+static func::ReturnOp getAssumedUniqueReturnOp(FuncOp funcOp) {
+  func::ReturnOp returnOp;
+  for (Block &b : funcOp.getBody()) {
+    if (auto candidateOp = dyn_cast<func::ReturnOp>(b.getTerminator())) {
+      if (returnOp)
+        return nullptr;
+      returnOp = candidateOp;
+    }
+  }
+  return returnOp;
+}
+
+/// Return the index-th bufferized function argument type. This assumes that the
+/// specified argument is a tensor. If the tensor is ranked, a layout map may be
+/// specified by the user. If no layout map is specified, a fully dynamic map is
+/// used.
+static BaseMemRefType
+getBufferizedFunctionArgType(FuncOp funcOp, int64_t index,
+                             const BufferizationOptions &options) {
+  auto tensorType =
+      funcOp.getFunctionType().getInput(index).dyn_cast<TensorType>();
+  assert(tensorType && "expected TensorType");
+  BaseMemRefType memrefType = getMemRefType(tensorType, options);
+
+  auto layoutAttr = funcOp.getArgAttrOfType<AffineMapAttr>(
+      index, BufferizationDialect::kBufferLayoutAttrName);
+  if (!layoutAttr)
+    return memrefType;
+
+  auto rankedMemrefType = memrefType.dyn_cast<MemRefType>();
+  assert(rankedMemrefType && "buffer layout not supported on unranked tensors");
+  return MemRefType::get(
+      rankedMemrefType.getShape(), rankedMemrefType.getElementType(),
+      layoutAttr.getValue(), rankedMemrefType.getMemorySpaceAsInt());
+}
+
+/// Return the FuncOp called by `callOp`.
+static FuncOp getCalledFunction(CallOpInterface callOp) {
+  SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
+  if (!sym)
+    return nullptr;
+  return dyn_cast_or_null<FuncOp>(
+      SymbolTable::lookupNearestSymbolFrom(callOp, sym));
+}
+
+/// Get FuncAnalysisState.
+static const FuncAnalysisState &
+getFuncAnalysisState(const AnalysisState &state) {
+  Optional<const FuncAnalysisState *> maybeState =
+      state.getDialectState<FuncAnalysisState>(
+          func::FuncDialect::getDialectNamespace());
+  assert(maybeState.hasValue() && "FuncAnalysisState does not exist");
+  return **maybeState;
+}
+
+/// Return the state (phase) of analysis of the FuncOp.
+static FuncOpAnalysisState getFuncOpAnalysisState(const AnalysisState &state,
+                                                  FuncOp funcOp) {
+  const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+  auto it = funcState.analyzedFuncOps.find(funcOp);
+  if (it == funcState.analyzedFuncOps.end())
+    return FuncOpAnalysisState::NotAnalyzed;
+  return it->second;
+}
+
+/// Return the index of the bbArg in the given FuncOp that is equivalent to the
+/// specified return value (if any).
+static Optional<int64_t> getEquivalentFuncArgIdx(FuncOp funcOp,
+                                                 const FuncAnalysisState &state,
+                                                 int64_t returnValIdx) {
+  auto funcOpIt = state.equivalentFuncArgs.find(funcOp);
+  if (funcOpIt == state.equivalentFuncArgs.end())
+    // No equivalence info stores for funcOp.
+    return None;
+
+  auto retValIt = funcOpIt->getSecond().find(returnValIdx);
+  if (retValIt == funcOpIt->getSecond().end())
+    // Return value has no equivalent bbArg.
+    return None;
+
+  return retValIt->getSecond();
+}
+
+struct CallOpInterface
+    : public BufferizableOpInterface::ExternalModel<CallOpInterface,
+                                                    func::CallOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    func::CallOp callOp = cast<func::CallOp>(op);
+    FuncOp funcOp = getCalledFunction(callOp);
+    assert(funcOp && "expected CallOp to a FuncOp");
+
+    const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+    if (getFuncOpAnalysisState(state, funcOp) != FuncOpAnalysisState::Analyzed)
+      // FuncOp not analyzed yet. Assume that OpOperand is read.
+      return true;
+
+    return funcState.readBbArgs.lookup(funcOp).contains(
+        opOperand.getOperandNumber());
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    func::CallOp callOp = cast<func::CallOp>(op);
+    FuncOp funcOp = getCalledFunction(callOp);
+    assert(funcOp && "expected CallOp to a FuncOp");
+
+    const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+    if (getFuncOpAnalysisState(state, funcOp) != FuncOpAnalysisState::Analyzed)
+      // FuncOp not analyzed yet. Assume that OpOperand is written.
+      return true;
+
+    return funcState.writtenBbArgs.lookup(funcOp).contains(
+        opOperand.getOperandNumber());
+  }
+
+  SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
+                                            const AnalysisState &state) const {
+    func::CallOp callOp = cast<func::CallOp>(op);
+    FuncOp funcOp = getCalledFunction(callOp);
+    assert(funcOp && "expected CallOp to a FuncOp");
+    const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+    if (getFuncOpAnalysisState(state, funcOp) !=
+        FuncOpAnalysisState::Analyzed) {
+      // FuncOp not analyzed yet. Any OpResult may be aliasing.
+      SmallVector<OpResult> result;
+      for (OpResult opResult : op->getOpResults())
+        if (opResult.getType().isa<TensorType>())
+          result.push_back(opResult);
+      return result;
+    }
+
+    // Get aliasing results from state.
+    auto aliasingReturnVals =
+        funcState.aliasingReturnVals.lookup(funcOp).lookup(
+            opOperand.getOperandNumber());
+    SmallVector<OpResult> result;
+    for (int64_t resultIdx : aliasingReturnVals)
+      result.push_back(callOp->getOpResult(resultIdx));
+    return result;
+  }
+
+  SmallVector<OpOperand *>
+  getAliasingOpOperand(Operation *op, OpResult opResult,
+                       const AnalysisState &state) const {
+    func::CallOp callOp = cast<func::CallOp>(op);
+    FuncOp funcOp = getCalledFunction(callOp);
+    assert(funcOp && "expected CallOp to a FuncOp");
+    const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+    if (getFuncOpAnalysisState(state, funcOp) !=
+        FuncOpAnalysisState::Analyzed) {
+      // FuncOp not analyzed yet. Any OpOperand may be aliasing.
+      SmallVector<OpOperand *> result;
+      for (OpOperand &opOperand : op->getOpOperands())
+        if (opOperand.get().getType().isa<TensorType>())
+          result.push_back(&opOperand);
+      return result;
+    }
+
+    // Get aliasing bbArgs from state.
+    auto aliasingFuncArgs = funcState.aliasingFuncArgs.lookup(funcOp).lookup(
+        opResult.getResultNumber());
+    SmallVector<OpOperand *> result;
+    for (int64_t bbArgIdx : aliasingFuncArgs)
+      result.push_back(&callOp->getOpOperand(bbArgIdx));
+    return result;
+  }
+
+  BufferRelation bufferRelation(Operation *op, OpResult opResult,
+                                const AnalysisState &state) const {
+    return BufferRelation::Equivalent;
+  }
+
+  /// All function arguments are writable. It is the responsibility of the
+  /// CallOp to insert buffer copies where necessary.
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          BufferizationState &state) const {
+    func::CallOp callOp = cast<func::CallOp>(op);
+    unsigned numResults = callOp.getNumResults();
+    unsigned numOperands = callOp->getNumOperands();
+    FuncOp funcOp = getCalledFunction(callOp);
+    assert(funcOp && "expected CallOp to a FuncOp");
+    FunctionType funcType = funcOp.getFunctionType();
+    const FuncAnalysisState &funcState =
+        getFuncAnalysisState(state.getAnalysisState());
+    const OneShotBufferizationOptions &options =
+        static_cast<const OneShotBufferizationOptions &>(state.getOptions());
+
+    // Result types of the bufferized CallOp.
+    SmallVector<Type> resultTypes;
+    // Replacement values for the existing CallOp. These are usually the results
+    // of the bufferized CallOp, unless a tensor result folds onto an operand.
+    SmallVector<Value> replacementValues(numResults, Value());
+    // For non-tensor results: A mapping from return val indices of the old
+    // CallOp to return val indices of the bufferized CallOp.
+    SmallVector<Optional<unsigned>> retValMapping(numResults, None);
+    // Operands of the bufferized CallOp.
+    SmallVector<Value> newOperands(numOperands, Value());
+
+    // Based on previously gathered equivalence information, we know if a
+    // tensor result folds onto an operand. These are the only tensor value
+    // results that are supported at the moment.
+    //
+    // For tensors return values that do not fold onto an operand, additional
+    // work is needed (TODO) to either:
+    // * hoist a result into an inplaceable operand or
+    // * devise a better representation to truly return a buffer.
+    //
+    // Note: If a function has no body, no equivalence information is
+    // available. Consequently, a tensor return value cannot be proven to fold
+    // onto a FuncOp bbArg, so calls to such functions are not bufferizable at
+    // the moment.
+
+    // 1. Compute the result types of the new CallOp. Tensor results that are
+    // equivalent to a FuncOp bbArg are no longer returned.
+    for (const auto &it : llvm::enumerate(callOp.getResultTypes())) {
+      unsigned returnValIdx = it.index();
+      Type returnType = it.value();
+      if (!returnType.isa<TensorType>()) {
+        // Non-tensor values are returned.
+        retValMapping[returnValIdx] = resultTypes.size();
+        resultTypes.push_back(returnType);
+        continue;
+      }
+
+      if (Optional<int64_t> bbArgIdx =
+              getEquivalentFuncArgIdx(funcOp, funcState, returnValIdx)) {
+        // Return operands that are equivalent to some bbArg, are not
+        // returned.
+        FailureOr<Value> bufferOrFailure =
+            state.getBuffer(rewriter, callOp->getOpOperand(*bbArgIdx));
+        if (failed(bufferOrFailure))
+          return failure();
+        replacementValues[returnValIdx] = *bufferOrFailure;
+        newOperands[*bbArgIdx] = *bufferOrFailure;
+        continue;
+      }
+
+      if (!options.allowReturnAllocs)
+        return callOp->emitError(
+            "call to FuncOp that returns non-equivalent tensors not supported");
+
+      // Returning a memref. This memref is not equivalent to any bbArg. It is
+      // likely a newly allocated buffer. We may want to hoist such allocations
+      // to the call site in the future.
+      retValMapping[returnValIdx] = resultTypes.size();
+      resultTypes.push_back(funcType.getResult(resultTypes.size()));
+    }
+
+    // 2. Rewrite tensor operands as memrefs based on `bufferizedFuncType`.
+    for (OpOperand &opOperand : callOp->getOpOperands()) {
+      unsigned idx = opOperand.getOperandNumber();
+      Value tensorOperand = opOperand.get();
+
+      // Non-tensor operands are just copied.
+      if (!tensorOperand.getType().isa<TensorType>()) {
+        newOperands[idx] = tensorOperand;
+        continue;
+      }
+
+      // Retrieve buffers for tensor operands. Tensor operand buffers, who's
+      // corresponding FuncOp bbArgs are equivalent to a returned tensor, were
+      // already stored in `newOperands` during Step 1.
+      Value buffer = newOperands[idx];
+      if (!buffer) {
+        FailureOr<Value> bufferOrFailure = state.getBuffer(rewriter, opOperand);
+        if (failed(bufferOrFailure))
+          return failure();
+        buffer = *bufferOrFailure;
+      }
+
+      // Caller / callee type mismatch is handled with a CastOp.
+      auto memRefType = funcType.getInput(idx);
+      // Since we don't yet have a clear layout story, to_memref may
+      // conservatively turn tensors into more dynamic memref than necessary.
+      // If the memref type of the callee fails, introduce an extra memref.cast
+      // that will either canonicalize away or fail compilation until we can do
+      // something better.
+      if (buffer.getType() != memRefType) {
+        assert(
+            memref::CastOp::areCastCompatible(buffer.getType(), memRefType) &&
+            "CallOp::bufferize: cast incompatible");
+        Value castBuffer = rewriter.create<memref::CastOp>(callOp.getLoc(),
+                                                           memRefType, buffer);
+        buffer = castBuffer;
+      }
+      newOperands[idx] = buffer;
+    }
+
+    // 3. Create the new CallOp.
+    Operation *newCallOp = rewriter.create<func::CallOp>(
+        callOp.getLoc(), funcOp.getSymName(), resultTypes, newOperands);
+    newCallOp->setAttrs(callOp->getAttrs());
+    // Get replacement values for non-tensor / non-equivalent results.
+    for (unsigned i = 0; i < replacementValues.size(); ++i) {
+      if (replacementValues[i])
+        continue;
+      replacementValues[i] = newCallOp->getResult(*retValMapping[i]);
+    }
+
+    // 4. Replace the old op with the new op.
+    replaceOpWithBufferizedValues(rewriter, callOp, replacementValues);
+
+    return success();
+  }
+};
+
+struct ReturnOpInterface
+    : public BufferizableOpInterface::ExternalModel<ReturnOpInterface,
+                                                    func::ReturnOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const AnalysisState &state) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const AnalysisState &state) const {
+    return false;
+  }
+
+  SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
+                                            const AnalysisState &state) const {
+    return {};
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          BufferizationState &state) const {
+#ifndef NDEBUG
+    auto returnOp = cast<func::ReturnOp>(op);
+    assert(isa<FuncOp>(returnOp->getParentOp()) &&
+           "only support FuncOp parent for ReturnOp");
+#endif // NDEBUG
+
+    // ReturnOps are bufferized as part of FuncOps.
+    return failure();
+  }
+};
+
+struct FuncOpInterface
+    : public BufferizableOpInterface::ExternalModel<FuncOpInterface, FuncOp> {
+  /// Rewrite function bbArgs and return values into buffer form (using the
+  /// canonical memref layout for now). This function bufferizes the function
+  /// signature and the ReturnOp. When the entire function body has been
+  /// bufferized, function return types can be switched to more concise memref
+  /// types as part of `foldMemRefCasts`.
+  ///
+  /// When a tensor function argument is known to be equivalent to a tensor
+  /// result, it is dropped from the return values.
+  ///
+  /// All function bbArgs are writable unless they are explicitly marked as
+  /// read-only. Callers must insert copies when needed.
+  ///
+  /// Note: Returning a memref is possible, but corresponding CallOp
+  /// bufferizations fail unless `allowReturnAllocs`.
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          BufferizationState &state) const {
+    auto funcOp = cast<FuncOp>(op);
+    FunctionType funcType = funcOp.getFunctionType();
+    const FuncAnalysisState &funcState =
+        getFuncAnalysisState(state.getAnalysisState());
+    const BufferizationOptions &options = state.getOptions();
+
+    // Construct the bufferized function type.
+    SmallVector<Type> argTypes;
+    for (const auto &it : llvm::enumerate(funcType.getInputs())) {
+      Type argType = it.value();
+      if (auto tensorType = argType.dyn_cast<TensorType>()) {
+        argTypes.push_back(
+            getBufferizedFunctionArgType(funcOp, it.index(), options));
+        continue;
+      }
+      argTypes.push_back(argType);
+    }
+
+    // Bodiless functions are assumed opaque and we cannot know the
+    // bufferization contract they want to enforce. As a consequence, only
+    // support functions that don't return any tensors atm.
+    if (funcOp.getBody().empty()) {
+      SmallVector<Type> retTypes;
+      for (Type resultType : funcType.getResults()) {
+        if (resultType.isa<TensorType>())
+          return funcOp->emitError() << "cannot bufferize bodiless function "
+                                     << "that returns a tensor";
+        retTypes.push_back(resultType);
+      }
+      funcOp.setType(FunctionType::get(op->getContext(), argTypes, retTypes));
+      return success();
+    }
+
+    // TODO: Support functions with multiple returns.
+    func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
+    assert(returnOp && "expected func with single return op");
+
+    // 1. Rewrite the bbArgs. Turn every tensor bbArg into a memref bbArg.
+    Block &frontBlock = funcOp.getBody().front();
+    for (BlockArgument &bbArg : frontBlock.getArguments()) {
+      auto tensorType = bbArg.getType().dyn_cast<TensorType>();
+      // Non-tensor types stay the same.
+      if (!tensorType)
+        continue;
+
+      // Collect all uses of the bbArg.
+      SmallVector<OpOperand *> bbArgUses;
+      for (OpOperand &use : bbArg.getUses())
+        bbArgUses.push_back(&use);
+
+      // Change the bbArg type to memref.
+      Type memrefType =
+          getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), options);
+      bbArg.setType(memrefType);
+
+      // Replace all uses of the original tensor bbArg.
+      rewriter.setInsertionPointToStart(&frontBlock);
+      if (!bbArgUses.empty()) {
+        // Insert to_tensor because the remaining function body has not been
+        // bufferized yet.
+        Value toTensorOp =
+            rewriter.create<bufferization::ToTensorOp>(funcOp.getLoc(), bbArg);
+        for (OpOperand *use : bbArgUses)
+          use->set(toTensorOp);
+      }
+    }
+
+    // 2. For each result, keep track of which inplace argument it reuses.
+    SmallVector<Value> returnValues;
+    for (OpOperand &returnOperand : returnOp->getOpOperands()) {
+      Value returnVal = returnOperand.get();
+
+      // If not a tensor type just forward it.
+      if (!returnVal.getType().isa<RankedTensorType>()) {
+        returnValues.push_back(returnVal);
+        continue;
+      }
+
+      // If return operand is equivalent to some bbArg, no need to return it.
+      if (Optional<int64_t> equivBbArgIdx = getEquivalentFuncArgIdx(
+              funcOp, funcState, returnOperand.getOperandNumber())) {
+        rewriter.setInsertionPoint(returnOp);
+        Location loc = returnOp.getLoc();
+        Value toMemrefOp = rewriter.create<bufferization::ToMemrefOp>(
+            loc, getMemRefType(returnVal.getType().cast<TensorType>(), options),
+            returnVal);
+        BlockArgument equivBbArg = funcOp.getArgument(*equivBbArgIdx);
+        // Note: This copy will fold away. It must be inserted here to ensure
+        // that `returnVal` still has at least one use and does not fold away.
+        if (failed(
+                createMemCpy(rewriter, loc, toMemrefOp, equivBbArg, options)))
+          return funcOp->emitError("could not generate copy for bbArg");
+        continue;
+      }
+
+      returnValues.push_back(*state.getBuffer(rewriter, returnOperand));
+    }
+
+    // 3. Rewrite the terminator without the in-place bufferizable values.
+    returnOp.operandsMutable().assign(returnValues);
+
+    // 4. Rewrite the FuncOp type to buffer form.
+    funcOp.setType(FunctionType::get(op->getContext(), argTypes,
+                                     ValueRange(returnValues).getTypes()));
+
+    return success();
+  }
+
+  /// Return `true` if the given function argument is writable.
+  bool isWritable(Operation *op, Value value,
+                  const AnalysisState &state) const {
+    auto funcOp = cast<FuncOp>(op);
+    BlockArgument bbArg = value.dyn_cast<BlockArgument>();
+    assert(bbArg && "expected BlockArgument");
+
+    // "bufferization.writable" overrides other writability decisions. This is
+    // currently used for testing only.
+    if (BoolAttr writable = funcOp.getArgAttrOfType<BoolAttr>(
+            bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName))
+      return writable.getValue();
+
+    // All function arguments are writable by default.
+    return true;
+  }
+
+  bool isAllocationHoistingBarrier(Operation *op) const { return true; }
+};
+
+} // namespace func_ext
+} // namespace bufferization
+} // namespace mlir
+
+void mlir::bufferization::func_ext::
+    registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
+  registry.addExtension(+[](MLIRContext *ctx, func::FuncDialect *dialect) {
+    func::CallOp::attachInterface<func_ext::CallOpInterface>(*ctx);
+    func::FuncOp::attachInterface<func_ext::FuncOpInterface>(*ctx);
+    func::ReturnOp::attachInterface<func_ext::ReturnOpInterface>(*ctx);
+  });
+}
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
@@ -0,0 +1,497 @@
+//===- ModuleBufferization.cpp - Bufferization across Func. Boundaries ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Module Bufferization is an extension of One-Shot Bufferize that
+// bufferizes function boundaries. It provides `BufferizableOpInterface`
+// implementations for FuncOp, CallOp and ReturnOp.
+//
+// Module Bufferization is run via `runOneShotModuleBufferize(ModuleOp, ...)`.
+// This function analyzes the given module and determines the order of analysis
+// and bufferization: Functions that are called are processed before their
+// respective callers.
+//
+// After analyzing a FuncOp, additional information about its bbArgs is
+// gathered through PostAnalysisStepFns and stored in `FuncAnalysisState`.
+//
+// * `aliasingFuncOpBBArgsAnalysis` determines the equivalent/aliasing bbArgs
+// for
+//   each tensor return value (if any).
+// * `funcOpBbArgReadWriteAnalysis` determines whether or not a tensor bbArg is
+//   read/written.
+//
+// Only tensors that are equivalent to some FuncOp bbArg may be returned.
+// Bufferization currently fails if other tensors (in particular tensors that
+// bufferize out-of-place and result in a new buffer allocation) are returned.
+// In the future, such allocations could be hoisted to the caller.
+//
+// Example: `foo` fails bufferization because %0 is not equivalent to any bbArg.
+// ```
+// func @foo() -> tensor<?xf32> {
+//   %0 = linalg.init_tensor [...] : tensor<?xf32>
+//   return %0 : tensor<?xf32>
+// }
+// ```
+//
+// Module Bufferization implements the following calling convention.
+//
+// * In the absence of conflicts within a FuncOp, the FuncOp's bbArgs may always
+//   be written to in-place.
+// * If a tensor operand of a CallOp is read after the CallOp, the operand of
+//   the CallOp must bufferize out-of-place.
+//
+// Example: The tensor.insert op bufferizes in-place because it is allowed to
+// modify the buffer of `%t1` directly. The CallOp in `caller` must bufferize
+// out-of-place because `%t0` is modified by the callee but read by the
+// tensor.extract op. The analysis of CallOps decides whether an OpOperand must
+// bufferize out-of-place based on results of `funcOpBbArgReadWriteAnalysis`.
+// ```
+// func @callee(%t1 : tensor<?xf32>) -> tensor<?xf32> {
+//   %f = ... : f32
+//   %0 = tensor.insert %f into %t1[...] : tensor<?xf32>
+//   return %0 : tensor<?xf32>
+// }
+//
+// func @caller() -> () {
+//   %t0 = ... : tensor<?xf32>
+//   %1 = call @callee(%t0) : (tensor<?xf32>) -> (tensor<?xf32>)
+//   %2 = tensor.extract %1[...]  : tensor<?xf32>
+// }
+// ```
+//
+// Note: If a function is external, `funcOpBbArgReadWriteAnalysis` cannot
+// analyze the function body. In such a case, the CallOp analysis conservatively
+// assumes that each tensor OpOperand is both read and written.
+//
+// TODO: Add FuncOp attributes so that bbArgs of external FuncOps can be marked
+// as "not reading" and/or "not writing".
+
+#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
+
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Operation.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+using namespace mlir::bufferization::func_ext;
+
+/// A mapping of FuncOps to their callers.
+using FuncCallerMap = DenseMap<func::FuncOp, DenseSet<Operation *>>;
+
+/// Get FuncAnalysisState.
+static const FuncAnalysisState &
+getFuncAnalysisState(const AnalysisState &state) {
+  Optional<const FuncAnalysisState *> maybeState =
+      state.getDialectState<FuncAnalysisState>(
+          func::FuncDialect::getDialectNamespace());
+  assert(maybeState.hasValue() && "FuncAnalysisState does not exist");
+  return **maybeState;
+}
+
+/// Get or create FuncAnalysisState.
+static FuncAnalysisState &getFuncAnalysisState(AnalysisState &state) {
+  return state.getOrCreateDialectState<FuncAnalysisState>(
+      func::FuncDialect::getDialectNamespace());
+}
+
+/// Return the state (phase) of analysis of the FuncOp.
+static FuncOpAnalysisState getFuncOpAnalysisState(const AnalysisState &state,
+                                                  func::FuncOp funcOp) {
+  const FuncAnalysisState &funcState = getFuncAnalysisState(state);
+  auto it = funcState.analyzedFuncOps.find(funcOp);
+  if (it == funcState.analyzedFuncOps.end())
+    return FuncOpAnalysisState::NotAnalyzed;
+  return it->second;
+}
+
+/// Return the unique ReturnOp that terminates `funcOp`.
+/// Return nullptr if there is no such unique ReturnOp.
+static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) {
+  func::ReturnOp returnOp;
+  for (Block &b : funcOp.getBody()) {
+    if (auto candidateOp = dyn_cast<func::ReturnOp>(b.getTerminator())) {
+      if (returnOp)
+        return nullptr;
+      returnOp = candidateOp;
+    }
+  }
+  return returnOp;
+}
+
+namespace {
+
+/// Annotate IR with the results of the analysis. For testing purposes only.
+static void annotateEquivalentReturnBbArg(OpOperand &returnVal,
+                                          BlockArgument bbArg) {
+  const char *kEquivalentArgsAttr = "__equivalent_func_args__";
+  Operation *op = returnVal.getOwner();
+
+  SmallVector<int64_t> equivBbArgs;
+  if (op->hasAttr(kEquivalentArgsAttr)) {
+    auto attr = op->getAttr(kEquivalentArgsAttr).cast<ArrayAttr>();
+    equivBbArgs = llvm::to_vector<4>(llvm::map_range(attr, [](Attribute a) {
+      return a.cast<IntegerAttr>().getValue().getSExtValue();
+    }));
+  } else {
+    equivBbArgs.append(op->getNumOperands(), -1);
+  }
+  equivBbArgs[returnVal.getOperandNumber()] = bbArg.getArgNumber();
+
+  OpBuilder b(op->getContext());
+  op->setAttr(kEquivalentArgsAttr, b.getI64ArrayAttr(equivBbArgs));
+}
+
+/// Store function BlockArguments that are equivalent to/aliasing a returned
+/// value in FuncAnalysisState.
+static LogicalResult
+aliasingFuncOpBBArgsAnalysis(Operation *op, AnalysisState &state,
+                             BufferizationAliasInfo &aliasInfo,
+                             SmallVector<Operation *> &newOps) {
+  FuncAnalysisState &funcState = getFuncAnalysisState(state);
+
+  // Support only single return-terminated block in the function.
+  auto funcOp = cast<func::FuncOp>(op);
+  func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
+  assert(returnOp && "expected func with single return op");
+
+  for (OpOperand &returnVal : returnOp->getOpOperands())
+    if (returnVal.get().getType().isa<RankedTensorType>())
+      for (BlockArgument bbArg : funcOp.getArguments())
+        if (bbArg.getType().isa<RankedTensorType>()) {
+          int64_t returnIdx = returnVal.getOperandNumber();
+          int64_t bbArgIdx = bbArg.getArgNumber();
+          if (aliasInfo.areEquivalentBufferizedValues(returnVal.get(), bbArg)) {
+            funcState.equivalentFuncArgs[funcOp][returnIdx] = bbArgIdx;
+            if (state.getOptions().testAnalysisOnly)
+              annotateEquivalentReturnBbArg(returnVal, bbArg);
+          }
+          if (aliasInfo.areAliasingBufferizedValues(returnVal.get(), bbArg)) {
+            funcState.aliasingFuncArgs[funcOp][returnIdx].push_back(bbArgIdx);
+            funcState.aliasingReturnVals[funcOp][bbArgIdx].push_back(returnIdx);
+          }
+        }
+
+  return success();
+}
+
+/// Return true if the buffer of the given tensor value is written to. Must not
+/// be called for values inside not yet analyzed functions. (Post-analysis
+/// steps do not have to be run yet, i.e., "in progress" is also OK.)
+static bool isValueWritten(Value value, const AnalysisState &state,
+                           const BufferizationAliasInfo &aliasInfo) {
+#ifndef NDEBUG
+  assert(value.getType().isa<TensorType>() && "expected TensorType");
+  func::FuncOp funcOp;
+  if (auto bbArg = value.dyn_cast<BlockArgument>()) {
+    Operation *owner = bbArg.getOwner()->getParentOp();
+    funcOp = isa<func::FuncOp>(owner) ? cast<func::FuncOp>(owner)
+                                      : owner->getParentOfType<func::FuncOp>();
+  } else {
+    funcOp = value.getDefiningOp()->getParentOfType<func::FuncOp>();
+  }
+  assert(getFuncOpAnalysisState(state, funcOp) !=
+             FuncOpAnalysisState::NotAnalyzed &&
+         "FuncOp must be fully analyzed or analysis in progress");
+#endif // NDEBUG
+
+  bool isWritten = false;
+  aliasInfo.applyOnAliases(value, [&](Value val) {
+    for (OpOperand &use : val.getUses())
+      if (state.isInPlace(use) && state.bufferizesToMemoryWrite(use))
+        isWritten = true;
+  });
+  return isWritten;
+}
+
+static void annotateFuncArgAccess(func::FuncOp funcOp, BlockArgument bbArg,
+                                  bool isRead, bool isWritten) {
+  OpBuilder b(funcOp.getContext());
+  Attribute accessType;
+  if (isRead && isWritten) {
+    accessType = b.getStringAttr("read-write");
+  } else if (isRead) {
+    accessType = b.getStringAttr("read");
+  } else if (isWritten) {
+    accessType = b.getStringAttr("write");
+  } else {
+    accessType = b.getStringAttr("none");
+  }
+  funcOp.setArgAttr(bbArg.getArgNumber(), "bufferization.access", accessType);
+}
+
+/// Determine which FuncOp bbArgs are read and which are written. If this
+/// PostAnalysisStepFn is run on a function with unknown ops, it will
+/// conservatively assume that such ops bufferize to a read + write.
+static LogicalResult
+funcOpBbArgReadWriteAnalysis(Operation *op, AnalysisState &state,
+                             BufferizationAliasInfo &aliasInfo,
+                             SmallVector<Operation *> &newOps) {
+  FuncAnalysisState &funcState = getFuncAnalysisState(state);
+  auto funcOp = cast<func::FuncOp>(op);
+
+  // If the function has no body, conservatively assume that all args are
+  // read + written.
+  if (funcOp.getBody().empty()) {
+    for (BlockArgument bbArg : funcOp.getArguments()) {
+      funcState.readBbArgs[funcOp].insert(bbArg.getArgNumber());
+      funcState.writtenBbArgs[funcOp].insert(bbArg.getArgNumber());
+    }
+
+    return success();
+  }
+
+  for (BlockArgument bbArg : funcOp.getArguments()) {
+    if (!bbArg.getType().isa<TensorType>())
+      continue;
+    bool isRead = state.isValueRead(bbArg);
+    bool isWritten = isValueWritten(bbArg, state, aliasInfo);
+    if (state.getOptions().testAnalysisOnly)
+      annotateFuncArgAccess(funcOp, bbArg, isRead, isWritten);
+    if (isRead)
+      funcState.readBbArgs[funcOp].insert(bbArg.getArgNumber());
+    if (isWritten)
+      funcState.writtenBbArgs[funcOp].insert(bbArg.getArgNumber());
+  }
+
+  return success();
+}
+} // namespace
+
+/// Remove bufferization attributes on FuncOp arguments.
+static void removeBufferizationAttributes(BlockArgument bbArg) {
+  auto funcOp = cast<func::FuncOp>(bbArg.getOwner()->getParentOp());
+  funcOp.removeArgAttr(bbArg.getArgNumber(),
+                       BufferizationDialect::kBufferLayoutAttrName);
+  funcOp.removeArgAttr(bbArg.getArgNumber(),
+                       BufferizationDialect::kWritableAttrName);
+}
+
+/// Return the func::FuncOp called by `callOp`.
+static func::FuncOp getCalledFunction(CallOpInterface callOp) {
+  SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast<SymbolRefAttr>();
+  if (!sym)
+    return nullptr;
+  return dyn_cast_or_null<func::FuncOp>(
+      SymbolTable::lookupNearestSymbolFrom(callOp, sym));
+}
+
+/// Gather equivalence info of CallOps.
+/// Note: This only adds new equivalence info if the called function was already
+/// analyzed.
+// TODO: This does not handle cyclic function call graphs etc.
+static void equivalenceAnalysis(func::FuncOp funcOp,
+                                BufferizationAliasInfo &aliasInfo,
+                                FuncAnalysisState &funcState) {
+  funcOp->walk([&](func::CallOp callOp) {
+    func::FuncOp calledFunction = getCalledFunction(callOp);
+    assert(calledFunction && "could not retrieved called func::FuncOp");
+
+    // No equivalence info available for the called function.
+    if (!funcState.equivalentFuncArgs.count(calledFunction))
+      return WalkResult::skip();
+
+    for (auto it : funcState.equivalentFuncArgs[calledFunction]) {
+      int64_t returnIdx = it.first;
+      int64_t bbargIdx = it.second;
+      Value returnVal = callOp.getResult(returnIdx);
+      Value argVal = callOp->getOperand(bbargIdx);
+      aliasInfo.unionEquivalenceClasses(returnVal, argVal);
+    }
+
+    return WalkResult::advance();
+  });
+}
+
+/// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by
+/// callee-caller order (i.e. callees without callers first).
+/// Store the map of FuncOp to all its callers in `callerMap`.
+/// Return `failure()` if a cycle of calls is detected or if we are unable to
+/// retrieve the called FuncOp from any CallOpInterface.
+static LogicalResult
+getFuncOpsOrderedByCalls(ModuleOp moduleOp,
+                         SmallVectorImpl<func::FuncOp> &orderedFuncOps,
+                         FuncCallerMap &callerMap) {
+  // For each FuncOp, the set of functions called by it (i.e. the union of
+  // symbols of all nested CallOpInterfaceOp).
+  DenseMap<func::FuncOp, DenseSet<func::FuncOp>> calledBy;
+  // For each FuncOp, the number of CallOpInterface it contains.
+  DenseMap<func::FuncOp, unsigned> numberCallOpsContainedInFuncOp;
+  WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult {
+    if (!funcOp.getBody().empty()) {
+      func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
+      if (!returnOp)
+        return funcOp->emitError()
+               << "cannot bufferize a FuncOp with tensors and "
+                  "without a unique ReturnOp";
+    }
+
+    numberCallOpsContainedInFuncOp[funcOp] = 0;
+    return funcOp.walk([&](CallOpInterface callOp) -> WalkResult {
+      // Only support CallOp for now.
+      if (!isa<func::CallOp>(callOp.getOperation()))
+        return callOp->emitError() << "expected a CallOp";
+      func::FuncOp calledFunction = getCalledFunction(callOp);
+      assert(calledFunction && "could not retrieved called func::FuncOp");
+      auto it = callerMap.try_emplace(calledFunction, DenseSet<Operation *>{});
+      it.first->getSecond().insert(callOp);
+      if (calledBy[calledFunction].count(funcOp) == 0) {
+        calledBy[calledFunction].insert(funcOp);
+        numberCallOpsContainedInFuncOp[funcOp]++;
+      }
+      return WalkResult::advance();
+    });
+  });
+  if (res.wasInterrupted())
+    return failure();
+  // Iteratively remove function operation that do not call any of the
+  // functions remaining in the callCounter map and add them to the worklist.
+  while (!numberCallOpsContainedInFuncOp.empty()) {
+    auto it = llvm::find_if(numberCallOpsContainedInFuncOp,
+                            [](auto entry) { return entry.getSecond() == 0; });
+    if (it == numberCallOpsContainedInFuncOp.end())
+      return moduleOp.emitOpError(
+          "expected callgraph to be free of circular dependencies.");
+    orderedFuncOps.push_back(it->getFirst());
+    for (auto callee : calledBy[it->getFirst()])
+      numberCallOpsContainedInFuncOp[callee]--;
+    numberCallOpsContainedInFuncOp.erase(it);
+  }
+  return success();
+}
+
+/// Set the attribute that triggers inplace bufferization on a FuncOp argument
+/// `bbArg`.
+static void setInPlaceFuncArgument(BlockArgument bbArg, bool inPlace) {
+  auto funcOp = cast<func::FuncOp>(bbArg.getOwner()->getParentOp());
+  funcOp.setArgAttr(bbArg.getArgNumber(),
+                    BufferizableOpInterface::kInplaceableAttrName,
+                    BoolAttr::get(bbArg.getContext(), inPlace));
+}
+
+/// Annotate the IR with the result of the analysis. For testing/debugging only.
+static void annotateOpsWithBufferizationMarkers(func::FuncOp funcOp,
+                                                const AnalysisState &state) {
+  auto bufferizableOp = cast<BufferizableOpInterface>(funcOp.getOperation());
+  for (BlockArgument bbArg : funcOp.getArguments())
+    if (bbArg.getType().isa<TensorType>())
+      setInPlaceFuncArgument(bbArg, bufferizableOp.isWritable(bbArg, state));
+}
+
+/// Fold return values that are memref casts and update function return types.
+///
+/// During FuncOp bufferization, the exact type of the returned memrefs (if any)
+/// is not known yet. Therefore, the bufferization uses memref types with the
+/// most generic layout map as function return types. After bufferizing the
+/// entire function body, a more concise memref type can potentially be used for
+/// the return type of the function.
+static void foldMemRefCasts(func::FuncOp funcOp) {
+  if (funcOp.getBody().empty())
+    return;
+
+  func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
+  SmallVector<Type> resultTypes;
+
+  for (OpOperand &operand : returnOp->getOpOperands()) {
+    if (auto castOp = operand.get().getDefiningOp<memref::CastOp>()) {
+      operand.set(castOp.source());
+      resultTypes.push_back(castOp.source().getType());
+    } else {
+      resultTypes.push_back(operand.get().getType());
+    }
+  }
+
+  auto newFuncType = FunctionType::get(
+      funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes);
+  funcOp.setType(newFuncType);
+}
+
+LogicalResult mlir::bufferization::runOneShotModuleBufferize(
+    ModuleOp moduleOp, OneShotBufferizationOptions options) {
+  IRRewriter rewriter(moduleOp.getContext());
+  OneShotAnalysisState analysisState(moduleOp, options);
+  BufferizationState bufferizationState(analysisState);
+  FuncAnalysisState &funcState = getFuncAnalysisState(analysisState);
+  BufferizationAliasInfo &aliasInfo = analysisState.getAliasInfo();
+
+  // A list of functions in the order in which they are analyzed + bufferized.
+  SmallVector<func::FuncOp> orderedFuncOps;
+
+  // A mapping of FuncOps to their callers.
+  FuncCallerMap callerMap;
+
+  if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap)))
+    return failure();
+
+  // Collect bbArg/return value information after the analysis.
+  options.addPostAnalysisStep(aliasingFuncOpBBArgsAnalysis);
+  options.addPostAnalysisStep(funcOpBbArgReadWriteAnalysis);
+
+  // Analyze ops.
+  for (func::FuncOp funcOp : orderedFuncOps) {
+    // No body => no analysis.
+    if (funcOp.getBody().empty())
+      continue;
+
+    // Now analyzing function.
+    funcState.startFunctionAnalysis(funcOp);
+
+    // Gather equivalence info for CallOps.
+    equivalenceAnalysis(funcOp, aliasInfo, funcState);
+
+    // Analyze funcOp.
+    if (failed(analyzeOp(funcOp, analysisState)))
+      return failure();
+
+    // Mark op as fully analyzed.
+    funcState.analyzedFuncOps[funcOp] = FuncOpAnalysisState::Analyzed;
+
+    // Add annotations to function arguments.
+    if (options.testAnalysisOnly)
+      annotateOpsWithBufferizationMarkers(funcOp, analysisState);
+  }
+
+  if (options.testAnalysisOnly)
+    return success();
+
+  // Bufferize functions.
+  for (func::FuncOp funcOp : orderedFuncOps) {
+    // Note: It would be good to apply cleanups here but we cannot as aliasInfo
+    // would be invalidated.
+    if (failed(bufferizeOp(funcOp, bufferizationState)))
+      return failure();
+    foldMemRefCasts(funcOp);
+  }
+
+  // Check result.
+  for (func::FuncOp funcOp : orderedFuncOps) {
+    if (!options.allowReturnAllocs &&
+        llvm::any_of(funcOp.getFunctionType().getResults(), [](Type t) {
+          return t.isa<MemRefType, UnrankedMemRefType>();
+        })) {
+      funcOp->emitError("memref return type is unsupported");
+      return failure();
+    }
+  }
+
+  // Finalize all buffers.
+  if (failed(finalizeBuffers(moduleOp, options)))
+    return failure();
+
+  // Post-pass cleanup of function argument attributes.
+  moduleOp.walk([&](func::FuncOp op) {
+    for (BlockArgument bbArg : op.getArguments())
+      removeBufferizationAttributes(bbArg);
+  });
+
+  return success();
+}
--- a/mlir/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_subdirectory(Analysis)
-add_subdirectory(ComprehensiveBufferize)
 add_subdirectory(IR)
 add_subdirectory(Transforms)
 add_subdirectory(Utils)
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -1,11 +0,0 @@
-add_mlir_dialect_library(MLIRModuleBufferization
-  ModuleBufferization.cpp
-
-  LINK_LIBS PUBLIC
-  MLIRBufferization
-  MLIRBufferizationTransforms
-  MLIRFunc
-  MLIRFuncTransforms
-  MLIRIR
-  MLIRMemRef
-)
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -133,17 +133,6 @@ LogicalResult LinalgDialect::verifyOperationAttribute(Operation *op,
                             << " to be used on function-like operations";
    return success();
  }
-  if (attr.getName() == BufferizableOpInterface::kBufferLayoutAttrName) {
-    if (!attr.getValue().isa<AffineMapAttr>()) {
-      return op->emitError()
-             << "'" << BufferizableOpInterface::kBufferLayoutAttrName
-             << "' is expected to be a affine map attribute";
-    }
-    if (!isa<FunctionOpInterface>(op))
-      return op->emitError() << "expected " << attr.getName()
-                             << " to be used on function-like operations";
-    return success();
-  }
  if (attr.getName() == LinalgDialect::kMemoizedIndexingMapsAttrName)
    return success();
  return op->emitError() << "attribute '" << attr.getName()
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -40,6 +40,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
  MLIRArithmetic
  MLIRArithmeticTransforms
  MLIRBufferization
+  MLIRBufferizationTransforms
  MLIRComplex
  MLIRFunc
  MLIRFuncToLLVM
@@ -50,7 +51,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms
  MLIRLinalg
  MLIRLinalgAnalysis
  MLIRLinalgUtils
-  MLIRModuleBufferization
  MLIRSCF
  MLIRSCFTransforms
  MLIRSCFUtils
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -11,10 +11,11 @@
 #include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h"
@@ -28,7 +29,6 @@
 using namespace mlir;
 using namespace mlir::bufferization;
 using namespace mlir::linalg;
-using namespace mlir::linalg::comprehensive_bufferize;

 namespace {
 struct LinalgComprehensiveModuleBufferize
@@ -55,7 +55,7 @@ struct LinalgComprehensiveModuleBufferize
    bufferization::registerAllocationOpInterfaceExternalModels(registry);
    linalg::registerBufferizableOpInterfaceExternalModels(registry);
    scf::registerBufferizableOpInterfaceExternalModels(registry);
-    std_ext::registerModuleBufferizationExternalModels(registry);
+    func_ext::registerBufferizableOpInterfaceExternalModels(registry);
    tensor::registerBufferizableOpInterfaceExternalModels(registry);
    vector::registerBufferizableOpInterfaceExternalModels(registry);
  }
@@ -109,7 +109,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
  ModuleOp moduleOp = getOperation();
  applyEnablingTransformations(moduleOp);

-  if (failed(runModuleBufferize(moduleOp, opt))) {
+  if (failed(runOneShotModuleBufferize(moduleOp, opt))) {
    signalPassFailure();
    return;
  }
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -204,8 +204,8 @@ func.func @simple_tensor_test(%t1 : tensor<?xf32>, %f : f32) -> tensor<?xf32> {
 // -----

 // CHECK-SCF-LABEL: func @simple_scf_if(
-//  CHECK-SCF-SAME:     %[[t1:.*]]: tensor<?xf32> {linalg.inplaceable = true}, %[[c:.*]]: i1, %[[pos:.*]]: index
-func.func @simple_scf_if(%t1: tensor<?xf32> {linalg.inplaceable = true}, %c: i1, %pos: index, %f: f32)
+//  CHECK-SCF-SAME:     %[[t1:.*]]: tensor<?xf32> {bufferization.writable = true}, %[[c:.*]]: i1, %[[pos:.*]]: index
+func.func @simple_scf_if(%t1: tensor<?xf32> {bufferization.writable = true}, %c: i1, %pos: index, %f: f32)
    -> (tensor<?xf32>, index) {
  // CHECK-SCF: %[[r:.*]] = scf.if %[[c]] -> (memref<?xf32, #{{.*}}>) {
  %r1, %r2 = scf.if %c -> (tensor<?xf32>, index) {
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=allow-return-allocs -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -split-input-file | FileCheck %s

 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null

 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null

 // Make sure that the returned buffer is not deallocated.
 // TODO: Such buffers currently leak. We need buffer hoisting / ref counting for
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -1,9 +1,12 @@
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s

 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+
+// TODO: Extract op-specific test cases and move them to their respective
+// dialects.

 //===----------------------------------------------------------------------===//
 // Simple cases
@@ -12,10 +15,10 @@
 // -----

 // CHECK-LABEL: func @extract_slice_fun(
-func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
-//  CHECK-SAME:          bufferization.access = "read"
-                        %B : tensor<?xf32> {linalg.inplaceable = true})
-//  CHECK-SAME:         bufferization.access = "read"
+func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
+//  CHECK-SAME:              bufferization.access = "read"
+                             %B : tensor<?xf32> {bufferization.writable = true})
+//  CHECK-SAME:              bufferization.access = "read"
  -> (tensor<4xf32>, tensor<8xf32>)
 {
  // tensor.extract_slice is not used in a write, it is not compelled to
@@ -36,12 +39,12 @@ func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
 // -----

 // CHECK-LABEL: func @insert_slice_fun(
-func.func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
-//  CHECK-SAME:        bufferization.access = "read"
-                       %B : tensor<?xf32> {linalg.inplaceable = true},
-//  CHECK-SAME:        bufferization.access = "read-write"
-                       %C : tensor<4xf32> {linalg.inplaceable = false})
-//  CHECK-SAME:        bufferization.access = "read"
+func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
+//  CHECK-SAME:             bufferization.access = "read"
+                            %B : tensor<?xf32> {bufferization.writable = true},
+//  CHECK-SAME:             bufferization.access = "read-write"
+                            %C : tensor<4xf32> {bufferization.writable = false})
+//  CHECK-SAME:             bufferization.access = "read"
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // must bufferize out of place.
@@ -62,10 +65,10 @@ func.func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
 // -----

 // CHECK-LABEL: func @conflict_on_B(
-func.func @conflict_on_B(%A : tensor<4x4xf32> {linalg.inplaceable = true},
-//  CHECK-SAME:     bufferization.access = "read"
-                    %B : tensor<4x4xf32> {linalg.inplaceable = true})
-//  CHECK-SAME:     bufferization.access = "read-write"
+func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true},
+//  CHECK-SAME:          bufferization.access = "read"
+                         %B : tensor<4x4xf32> {bufferization.writable = true})
+//  CHECK-SAME:          bufferization.access = "read-write"
  -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
 {
  // matmul output operand interferes with input operand.
@@ -102,9 +105,9 @@ func.func @conflict_on_B(%A : tensor<4x4xf32> {linalg.inplaceable = true},

 // CHECK-LABEL: func @extract_slice_extract_slice(
 func.func @extract_slice_extract_slice(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
 //  CHECK-SAME:         bufferization.access = "read"
-    %B : tensor<?xf32> {linalg.inplaceable = false})
+    %B : tensor<?xf32> {bufferization.writable = false})
 //  CHECK-SAME:         bufferization.access = "read"
  -> (tensor<2xf32>, tensor<2xf32>)
 {
@@ -131,17 +134,17 @@ func.func @extract_slice_extract_slice(

 // CHECK-LABEL: func @insert_slice_insert_slice(
 func.func @insert_slice_insert_slice(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
 //  CHECK-SAME:         bufferization.access = "read-write"
-    %A2 : tensor<4xf32> {linalg.inplaceable = true},
+    %A2 : tensor<4xf32> {bufferization.writable = true},
 //  CHECK-SAME:          bufferization.access = "read-write"
-    %A3 : tensor<2xf32> {linalg.inplaceable = true},
+    %A3 : tensor<2xf32> {bufferization.writable = true},
 //  CHECK-SAME:          bufferization.access = "read"
-    %B : tensor<?xf32> {linalg.inplaceable = false},
+    %B : tensor<?xf32> {bufferization.writable = false},
 //  CHECK-SAME:         bufferization.access = "read"
-    %B2 : tensor<4xf32> {linalg.inplaceable = false},
+    %B2 : tensor<4xf32> {bufferization.writable = false},
 //  CHECK-SAME:          bufferization.access = "read"
-    %B3 : tensor<2xf32> {linalg.inplaceable = false})
+    %B3 : tensor<2xf32> {bufferization.writable = false})
 //  CHECK-SAME:          bufferization.access = "read"
  -> (tensor<?xf32>, tensor<?xf32>)
 {
@@ -166,8 +169,8 @@ func.func @insert_slice_insert_slice(

 // CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
 func.func @extract_slice_nonmatching_insert_slice(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %B : tensor<?xf32> {linalg.inplaceable = false},
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %B : tensor<?xf32> {bufferization.writable = false},
    %idx: index)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
@@ -205,8 +208,8 @@ func.func @extract_slice_nonmatching_insert_slice(

 // CHECK-LABEL: func @extract_slice_matching_insert_slice
 func.func @extract_slice_matching_insert_slice(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %B : tensor<?xf32> {linalg.inplaceable = false})
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %B : tensor<?xf32> {bufferization.writable = false})
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // %r1 bufferizes inplace because %A is inplaceable.
@@ -243,7 +246,7 @@ func.func @extract_slice_matching_insert_slice(

 // CHECK-LABEL: @read_of_matching_insert_slice_source
 func.func @read_of_matching_insert_slice_source(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
    %idx : index,
    %idx2 : index)
  -> (tensor<?xf32>, vector<5xf32>)
@@ -274,7 +277,7 @@ func.func @read_of_matching_insert_slice_source(

 // CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved
 func.func @read_of_matching_insert_slice_source_interleaved(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
    %idx : index,
    %idx2 : index,
    %idx3 : index)
@@ -318,9 +321,9 @@ func.func @read_of_matching_insert_slice_source_interleaved(

 // CHECK-LABEL: func @extract_slice_linalg_readonly_use
 func.func @extract_slice_linalg_readonly_use(
-    %A : tensor<?x?xf32> {linalg.inplaceable = false},
-    %B : tensor<4x4xf32> {linalg.inplaceable = false},
-    %C : tensor<4x4xf32> {linalg.inplaceable = true})
+    %A : tensor<?x?xf32> {bufferization.writable = false},
+    %B : tensor<4x4xf32> {bufferization.writable = false},
+    %C : tensor<4x4xf32> {bufferization.writable = true})
  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
 {
  // tensor.extract_slice is only used as a read, no interference irrespective
@@ -352,9 +355,9 @@ func.func @extract_slice_linalg_readonly_use(

 // CHECK-LABEL: func @extract_slice_to_linalg_write_use
 func.func @extract_slice_to_linalg_write_use(
-    %A : tensor<4x4xf32> {linalg.inplaceable = false},
-    %B : tensor<?x?xf32> {linalg.inplaceable = false},
-    %C : tensor<?x?xf32> {linalg.inplaceable = true})
+    %A : tensor<4x4xf32> {bufferization.writable = false},
+    %B : tensor<?x?xf32> {bufferization.writable = false},
+    %C : tensor<?x?xf32> {bufferization.writable = true})
  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
 {
  // Step 4. %sB forward propagates to a write in %D but it is not inplace.
@@ -396,9 +399,9 @@ func.func @insert_slice_double_extract_slice(
    %s2: index,
    %s3: index,
    %s4: index,
-    %A: tensor<8x6xf32> {linalg.inplaceable = false},
-    %B: tensor<6x6xf32> {linalg.inplaceable = false},
-    %C: tensor<30x20xf32> {linalg.inplaceable = true})
+    %A: tensor<8x6xf32> {bufferization.writable = false},
+    %B: tensor<6x6xf32> {bufferization.writable = false},
+    %C: tensor<30x20xf32> {bufferization.writable = true})
  -> tensor<30x20xf32>
 {
  //      CHECK: tensor.extract_slice
@@ -430,9 +433,9 @@ func.func @insert_slice_double_extract_slice(

 // CHECK-LABEL: func @extract_slice_to_linalg_write_use
 func.func @extract_slice_to_linalg_write_use(
-    %A : tensor<4x4xf32> {linalg.inplaceable = false},
-    %B : tensor<?x?xf32> {linalg.inplaceable = false},
-    %C : tensor<?x?xf32> {linalg.inplaceable = true})
+    %A : tensor<4x4xf32> {bufferization.writable = false},
+    %B : tensor<?x?xf32> {bufferization.writable = false},
+    %C : tensor<?x?xf32> {bufferization.writable = true})
  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
 {
  // Step 4. %sB forward propagates to an inplace write in %D.
@@ -472,9 +475,9 @@ func.func @extract_slice_to_linalg_write_use(

 // CHECK-LABEL: func @nested_extract_slice_and_insert
 func.func @nested_extract_slice_and_insert(
-    %A : tensor<?x?xf32> {linalg.inplaceable = false},
-    %B : tensor<?x?xf32> {linalg.inplaceable = true},
-    %C : tensor<?x?xf32> {linalg.inplaceable = true},
+    %A : tensor<?x?xf32> {bufferization.writable = false},
+    %B : tensor<?x?xf32> {bufferization.writable = true},
+    %C : tensor<?x?xf32> {bufferization.writable = true},
    %idx : index,
    %sz1 : index,
    %sz2 : index)
@@ -564,8 +567,8 @@ func.func @nested_extract_slice_and_insert(

 // CHECK-LABEL: func @scf_for_yield_only
 func.func @scf_for_yield_only(
-    %A : tensor<?xf32> {linalg.inplaceable = false},
-    %B : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = false},
+    %B : tensor<?xf32> {bufferization.writable = true},
    %lb : index,
    %ub : index,
    %step : index)
@@ -596,9 +599,9 @@ func.func @scf_for_yield_only(

 // CHECK-LABEL: func @scf_for_with_tensor.insert_slice
 func.func @scf_for_with_tensor.insert_slice(
-    %A : tensor<?xf32> {linalg.inplaceable = false},
-    %B : tensor<?xf32> {linalg.inplaceable = true},
-    %C : tensor<4xf32> {linalg.inplaceable = false},
+    %A : tensor<?xf32> {bufferization.writable = false},
+    %B : tensor<?xf32> {bufferization.writable = true},
+    %C : tensor<4xf32> {bufferization.writable = false},
    %lb : index,
    %ub : index,
    %step : index)
@@ -634,8 +637,8 @@ func.func private @some_use(tensor<?xf32>) -> ()

 // CHECK-LABEL: func @scf_for_deps
 func.func @scf_for_deps(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %B : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %B : tensor<?xf32> {bufferization.writable = true},
    %lb : index,
    %ub : index,
    %step : index)
@@ -680,7 +683,7 @@ func.func @scf_for_deps(
 func.func private @foo(tensor<64xf32>)

 // CHECK-LABEL: dependence_through_call
-func.func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) {
+func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) {
  %f1 = arith.constant 1.000000e+00 : f32
  %f2 = arith.constant 2.000000e+00 : f32

@@ -712,8 +715,8 @@ func.func private @bar(%A : tensor<64xf32>) {
 }

 func.func @read_dependence_through_scf_and_call(
-    %I : tensor<64xf32> {linalg.inplaceable = true},
-    %I2 : tensor<64xf32> {linalg.inplaceable = true}) {
+    %I : tensor<64xf32> {bufferization.writable = true},
+    %I2 : tensor<64xf32> {bufferization.writable = true}) {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %c10 = arith.constant 10 : index
@@ -784,10 +787,10 @@ func.func @write_into_constant_via_alias(%v : vector<5xi32>,

 // -----

-func.func @matmul_on_tensors(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func @matmul_on_tensors(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
    -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -822,10 +825,10 @@ func.func @matmul_on_tensors(

 // -----

-func.func @matmul_on_tensors(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func @matmul_on_tensors(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
    -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -878,11 +881,11 @@ func.func @matmul_on_tensors(
 func.func @insert_slice_chain(
    %v1: vector<32x90xf32>,
    %v2: vector<30x90xf32>,
-    %arg0: tensor<62x126xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
 // CHECK-SAME: bufferization.access = "none"
-    %arg1: tensor<126x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
 // CHECK-SAME: bufferization.access = "none"
-    %arg2: tensor<62x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
 // CHECK-SAME: bufferization.access = "write"
  -> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
 {
@@ -926,7 +929,7 @@ func.func @insert_slice_chain(

 // Only test IR validity wrt dominance.
 // CHECK-LABEL: func @ip
-func.func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},
+func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true},
         %x: index, %y: index, %v: vector<5x6xf32>)
  -> tensor<10x20xf32>
 {
@@ -960,9 +963,9 @@ func.func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},

 // CHECK-LABEL: func @linalg_op_same_out_tensors(
 func.func @linalg_op_same_out_tensors(
-    %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %t1: tensor<?xf32> {bufferization.writable = true},
 // CHECK-SAME:          bufferization.access = "read"
-    %t2: tensor<?xf32> {linalg.inplaceable = true})
+    %t2: tensor<?xf32> {bufferization.writable = true})
 // CHECK-SAME:          bufferization.access = "write"
  -> (tensor<?xf32>, tensor<?xf32>){

@@ -994,9 +997,9 @@ func.func @linalg_op_same_out_tensors(

 // CHECK-LABEL: func @linalg_op_same_out_tensors_2(
 func.func @linalg_op_same_out_tensors_2(
-    %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %t1: tensor<?xf32> {bufferization.writable = true},
 // CHECK-SAME:          bufferization.access = "read"
-    %t2: tensor<?xf32> {linalg.inplaceable = true})
+    %t2: tensor<?xf32> {bufferization.writable = true})
 // CHECK-SAME:          bufferization.access = "write"
        -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){

@@ -1020,7 +1023,7 @@ func.func @linalg_op_same_out_tensors_2(
 func.func @double_insert_slice_into_alias(
    %v1: vector<32x90xf32>,
    %v2: vector<30x90xf32>,
-    %arg2: tensor<62x90xf32> {linalg.inplaceable = true},
+    %arg2: tensor<62x90xf32> {bufferization.writable = true},
    %s1: index, %s2: index, %s3: index, %s4: index)
  -> (tensor<62x90xf32>, tensor<?x?xf32>)
 {
@@ -1061,7 +1064,7 @@ func.func @double_insert_slice_into_alias(

 // CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1
 func.func @interleaved_extract_insert_slice_chain_1(
-    %arg2: tensor<62x90xf32> {linalg.inplaceable = true})
+    %arg2: tensor<62x90xf32> {bufferization.writable = true})
  -> (tensor<62x90xf32>)
 {
  //      CHECK: tensor.extract_slice
@@ -1092,7 +1095,7 @@ func.func @interleaved_extract_insert_slice_chain_1(

 // CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2
 func.func @interleaved_extract_insert_slice_chain_2(
-    %arg2: tensor<62x90xf32> {linalg.inplaceable = true})
+    %arg2: tensor<62x90xf32> {bufferization.writable = true})
  -> (tensor<62x90xf32>)
 {
  //      CHECK: tensor.extract_slice
@@ -1123,7 +1126,7 @@ func.func @interleaved_extract_insert_slice_chain_2(

 // CHECK-LABEL: func @extract_once_insert_twice
 func.func @extract_once_insert_twice(
-    %arg2: tensor<62x90xf32> {linalg.inplaceable = true})
+    %arg2: tensor<62x90xf32> {bufferization.writable = true})
  -> (tensor<62x90xf32>)
 {
  //      CHECK: tensor.extract_slice
@@ -1154,8 +1157,8 @@ func.func @extract_once_insert_twice(
 }

 // CHECK-LABEL: func @reading_scf_for
-func.func @reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
+func.func @reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {

  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
@@ -1201,8 +1204,8 @@ func.func @reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
 }

 // CHECK-LABEL: func @non_reading_scf_for
-func.func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                          %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
+func.func @non_reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
+                               %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {

  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
@@ -1250,9 +1253,9 @@ func.func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},

 // This example passes analysis, but it fails when bufferizing.
 // CHECK-LABEL: func @scf_if_inplace1
-func.func @scf_if_inplace1(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %t2: tensor<?xf32> {linalg.inplaceable = true},
-                      %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_inplace1(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %t2: tensor<?xf32> {bufferization.writable = true},
+                           %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    // CHECK:      scf.yield
    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -1268,9 +1271,9 @@ func.func @scf_if_inplace1(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_inplace2
-func.func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %v: vector<5xf32>, %idx: index,
-                      %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_inplace2(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %v: vector<5xf32>, %idx: index,
+                           %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    // CHECK:      scf.yield
    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -1289,9 +1292,9 @@ func.func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_inplace3
-func.func @scf_if_inplace3(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
-                      %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_inplace3(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
+                           %cond: i1) -> tensor<?xf32> {
  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
  %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
@@ -1317,9 +1320,9 @@ func.func @scf_if_inplace3(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_in_place4
-func.func @scf_if_in_place4(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                       %v: vector<5xf32>, %idx: index,
-                       %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
+func.func @scf_if_in_place4(%t1: tensor<?xf32> {bufferization.writable = true},
+                            %v: vector<5xf32>, %idx: index,
+                            %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
  %cst = arith.constant 0.0 : f32
  %r = scf.if %cond -> (tensor<?xf32>) {
    //      CHECK: scf.yield
@@ -1353,8 +1356,8 @@ func.func @scf_if_in_place4(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_inplace5
-func.func @scf_if_inplace5(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %idx: index, %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_inplace5(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %idx: index, %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    //      CHECK: tensor.extract_slice
    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
@@ -1385,10 +1388,10 @@ func.func @scf_if_inplace5(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_inplace6
-func.func @scf_if_inplace6(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %v1: vector<5xf32>, %v2: vector<5xf32>,
-                      %v3: vector<5xf32>, %idx: index,
-                      %cond: i1, %cond2: i1) -> tensor<?xf32> {
+func.func @scf_if_inplace6(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %v1: vector<5xf32>, %v2: vector<5xf32>,
+                           %v3: vector<5xf32>, %idx: index,
+                           %cond: i1, %cond2: i1) -> tensor<?xf32> {
  // Test nested scf.if ops.
  %r = scf.if %cond -> (tensor<?xf32>) {
    %t2 = scf.if %cond2 -> (tensor<?xf32>) {
@@ -1426,9 +1429,9 @@ func.func @scf_if_inplace6(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_inplace7
-func.func @scf_if_inplace7(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
-                      %idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
+func.func @scf_if_inplace7(%t1: tensor<?xf32> {bufferization.writable = true},
+                           %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
+                           %idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
  %cst = arith.constant 0.0 : f32
  %r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
    //      CHECK: vector.transfer_write
@@ -1456,9 +1459,9 @@ func.func @scf_if_inplace7(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_out_of_place1a
-func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                            %idx: index, %idx2: index,
-                            %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {bufferization.writable = true},
+                                 %idx: index, %idx2: index,
+                                 %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    //      CHECK: tensor.extract_slice
    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
@@ -1483,9 +1486,9 @@ func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_out_of_place1b
-func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                            %idx: index, %idx2: index, %idx3: index,
-                            %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {bufferization.writable = true},
+                                 %idx: index, %idx2: index, %idx3: index,
+                                 %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    //      CHECK: tensor.extract_slice
    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
@@ -1519,8 +1522,8 @@ func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_out_of_place1c
-func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                            %idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {bufferization.writable = true},
+                                 %idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    //      CHECK: tensor.extract_slice
    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
@@ -1550,9 +1553,9 @@ func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_out_of_place2
-func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                           %v: vector<5xf32>, %idx: index,
-                           %cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
+func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {bufferization.writable = true},
+                                %v: vector<5xf32>, %idx: index,
+                                %cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
  %cst = arith.constant 0.0 : f32
  %r = scf.if %cond -> (tensor<?xf32>) {
    scf.yield %t1 : tensor<?xf32>
@@ -1574,9 +1577,9 @@ func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @scf_if_out_of_place3
-func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                           %v: vector<5xf32>, %idx: index,
-                           %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
+func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {bufferization.writable = true},
+                                %v: vector<5xf32>, %idx: index,
+                                %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
  %cst = arith.constant 0.0 : f32
  %r = scf.if %cond -> (tensor<?xf32>) {
    scf.yield %t1 : tensor<?xf32>
@@ -1605,8 +1608,8 @@ func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {linalg.inplaceable = true},
 // -----

 // CHECK-LABEL: func @some_use
-func.func @some_use(%A : tensor<?xf32> {linalg.inplaceable = true},
-               %v : vector<5xf32>) -> (tensor<?xf32>) {
+func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true},
+                    %v : vector<5xf32>) -> (tensor<?xf32>) {
  %idx = arith.constant 0 : index
  //      CHECK: vector.transfer_write
  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
@@ -1616,8 +1619,8 @@ func.func @some_use(%A : tensor<?xf32> {linalg.inplaceable = true},


 // CHECK-LABEL: func @main_func
-func.func @main_func(%A : tensor<?xf32> {linalg.inplaceable = true},
-                %v : vector<5xf32>) -> (tensor<?xf32>) {
+func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true},
+                     %v : vector<5xf32>) -> (tensor<?xf32>) {
  //      CHECK: call
  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
  %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
@@ -1646,9 +1649,9 @@ func.func @to_tensor_op_not_writable(%m: memref<?xf32>, %v:  vector<5xf32>,
 // -----

 // CHECK-LABEL: func @to_memref_op_is_reading
-func.func @to_memref_op_is_reading(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                              %idx1: index, %idx2: index, %idx3: index,
-                              %v1: vector<5xf32>)
+func.func @to_memref_op_is_reading(%t1: tensor<?xf32> {bufferization.writable = true},
+                                   %idx1: index, %idx2: index, %idx3: index,
+                                   %v1: vector<5xf32>)
    -> (vector<5xf32>, vector<5xf32>) {
  // Write + read to/from tensor.
  //      CHECK: vector.transfer_write
@@ -1711,8 +1714,8 @@ func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tenso
 // CHECK-LABEL: func @write_after_select_read_one
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
 func.func @write_after_select_read_one(
-    %t1 : tensor<?xf32> {linalg.inplaceable = true},
-    %t2 : tensor<?xf32> {linalg.inplaceable = true},
+    %t1 : tensor<?xf32> {bufferization.writable = true},
+    %t2 : tensor<?xf32> {bufferization.writable = true},
    %c : i1)
  -> (f32, tensor<?xf32>)
 {
@@ -1737,8 +1740,8 @@ func.func @write_after_select_read_one(
 // CHECK-LABEL: func @write_after_select_read_both
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
 func.func @write_after_select_read_both(
-    %t1 : tensor<?xf32> {linalg.inplaceable = true},
-    %t2 : tensor<?xf32> {linalg.inplaceable = true},
+    %t1 : tensor<?xf32> {bufferization.writable = true},
+    %t2 : tensor<?xf32> {bufferization.writable = true},
    %c : i1)
  -> (f32, f32, tensor<?xf32>)
 {
@@ -1766,8 +1769,8 @@ func.func @write_after_select_read_both(
 // CHECK-LABEL: func @write_after_select_no_conflict
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
 func.func @write_after_select_no_conflict(
-    %t1 : tensor<?xf32> {linalg.inplaceable = true},
-    %t2 : tensor<?xf32> {linalg.inplaceable = true},
+    %t1 : tensor<?xf32> {bufferization.writable = true},
+    %t2 : tensor<?xf32> {bufferization.writable = true},
    %c : i1)
  -> (f32, tensor<?xf32>)
 {
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file -verify-diagnostics

 func.func private @foo() -> tensor<?xf32>

@@ -37,7 +37,7 @@ func.func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor<f32>, %t2 : tensor<f32>
 // -----

 func.func @scf_if_not_equivalent(
-    %cond: i1, %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
    %idx: index) -> tensor<?xf32> {
  %r = scf.if %cond -> (tensor<?xf32>) {
    scf.yield %t1 : tensor<?xf32>
@@ -54,7 +54,7 @@ func.func @scf_if_not_equivalent(
 // -----

 func.func @scf_if_not_aliasing(
-    %cond: i1, %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
    %idx: index) -> f32 {
  %r = scf.if %cond -> (tensor<?xf32>) {
    scf.yield %t1 : tensor<?xf32>
@@ -85,7 +85,7 @@ func.func @bar() {
 // -----

 func.func @scf_for(%A : tensor<?xf32>,
-              %B : tensor<?xf32> {linalg.inplaceable = true},
+              %B : tensor<?xf32> {bufferization.writable = true},
              %C : tensor<4xf32>,
              %lb : index, %ub : index, %step : index)
  -> (f32, f32)
@@ -110,14 +110,14 @@ func.func @scf_for(%A : tensor<?xf32>,

 // -----

-func.func private @fun_with_side_effects(%A: tensor<?xf32> {linalg.inplaceable = true})
+func.func private @fun_with_side_effects(%A: tensor<?xf32> {bufferization.writable = true})

-func.func @foo(%A: tensor<?xf32> {linalg.inplaceable = true}) -> (tensor<?xf32>) {
+func.func @foo(%A: tensor<?xf32> {bufferization.writable = true}) -> (tensor<?xf32>) {
  call @fun_with_side_effects(%A) : (tensor<?xf32>) -> ()
  return %A: tensor<?xf32>
 }

-func.func @scf_yield_needs_copy(%A : tensor<?xf32> {linalg.inplaceable = true}, %iters : index) {
+func.func @scf_yield_needs_copy(%A : tensor<?xf32> {bufferization.writable = true}, %iters : index) {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %res = scf.for %arg0 = %c0 to %iters step %c1 iter_args(%bbarg = %A) -> (tensor<?xf32>) {
@@ -131,7 +131,7 @@ func.func @scf_yield_needs_copy(%A : tensor<?xf32> {linalg.inplaceable = true},

 // -----

-func.func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
+func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = true})
  ->  tensor<4xf32>
 {
  // This bufferizes to a pattern that the cross-function boundary pass needs to
@@ -184,6 +184,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> {
 func.func @main() -> tensor<4xi32> {
  %r = scf.execute_region -> tensor<4xi32> {
    %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
+    // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
    scf.yield %A: tensor<4xi32>
  }

@@ -194,7 +195,7 @@ func.func @main() -> tensor<4xi32> {
 // -----

 func.func @to_memref_op_is_writing(
-    %t1: tensor<?xf32> {linalg.inplaceable = true}, %idx1: index,
+    %t1: tensor<?xf32> {bufferization.writable = true}, %idx1: index,
    %idx2: index, %idx3: index, %v1: vector<5xf32>) -> (vector<5xf32>, vector<5xf32>) {
  // This is a RaW conflict because to_memref is an inplace write and %t1 is
  // read further down. This will likely have to change with partial
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file | FileCheck %s

 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null

 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP-LABEL

 // Bufferization of bodiless function with no tensor return value.

@@ -38,7 +38,7 @@ func.func private @private_func(tensor<?xf32>) -> (f32)
 //   CHECK-NOT: alloc
 //   CHECK-NOT: copy
 //       CHECK: call @private_func(%[[t]])
-func.func @main(%t: tensor<?xf32> {linalg.inplaceable = true}) -> (f32) {
+func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) {
  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
  return %0 : f32
 }
@@ -57,7 +57,7 @@ func.func private @private_func(tensor<?xf32>) -> (f32)
 //   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
 //       CHECK: call @private_func(%[[casted]])
 //       CHECK: memref.dealloc %[[alloc]]
-func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
+func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
  return %0 : f32
 }
@@ -106,7 +106,7 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
 // CHECK-LABEL: func @call_func_with_non_tensor_return(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
 func.func @call_func_with_non_tensor_return(
-    %t0: tensor<?xf32> {linalg.inplaceable = true}) -> (f32, tensor<?xf32>) {
+    %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
  // CHECK-NOT: alloc
  // CHECK-NOT: copy
  // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
@@ -138,7 +138,7 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
 // CHECK-LABEL: func @call_func_with_non_tensor_return(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
 func.func @call_func_with_non_tensor_return(
-    %t0: tensor<?xf32> {linalg.inplaceable = false}) -> (f32, tensor<?xf32>) {
+    %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) {
  // CHECK: %[[alloc:.*]] = memref.alloc
  // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
  // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
@@ -184,7 +184,7 @@ func.func @f2(%t: tensor<?xf32>) -> (f32) {
 //   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
 //       CHECK: call @f2(%[[casted]])
 //       CHECK: memref.dealloc %[[alloc]]
-func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
+func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
  %0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
  return %0 : f32
 }
@@ -211,7 +211,7 @@ func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
 //       CHECK:   call @does_not_read(%[[casted]])
 //       CHECK:   %[[r:.*]] = memref.load %[[alloc]]
 //       CHECK:   memref.dealloc %[[alloc]]
-func.func @main(%t: tensor<?xf32> {linalg.inplaceable = false}) -> f32 {
+func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
  %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
  %idx = arith.constant 4 : index
  %r = tensor.extract %0[%idx] : tensor<?xf32>
@@ -344,9 +344,9 @@ func.func @scf_for_with_tensor_insert_slice(
 // CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
 // CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]>
 func.func @bar(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %B : tensor<?xf32> {linalg.inplaceable = true},
-    %C : tensor<4xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %B : tensor<?xf32> {bufferization.writable = true},
+    %C : tensor<4xf32> {bufferization.writable = true},
    %lb : index, %ub : index, %step : index)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
@@ -447,9 +447,10 @@ func.func private @external_func(tensor<?xf32>)
 // CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
 // CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
 // CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
-func.func @callee(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
-             %B : tensor<?xf32>,
-             %C : tensor<?xf32>) {
+func.func @callee(
+    %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
+    %B : tensor<?xf32>,
+    %C : tensor<?xf32>) {
 // CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, #[[$DYNAMIC]]>
 // CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, #[[$DYNAMIC]]>) -> ()
  call @external_func(%A) : (tensor<?xf32>) -> ()
@@ -467,9 +468,9 @@ func.func @callee(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0,
 // CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
 // CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32>
 // CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, #[[$DYNAMIC]]>
-func.func @entry(%A : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, linalg.inplaceable = false},
-            %B : tensor<?xf32> {linalg.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, linalg.inplaceable = false},
-            %C : tensor<?xf32> {linalg.inplaceable = false}) {
+func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
+                 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
+                 %C : tensor<?xf32> {bufferization.writable = false}) {
 // Note: `callee` does not write to its bbArg directly, but `external_func`
 // does. Inside `callee`, the writes via `external_func` do not cause a
 // conflict. However, inside `entry`, the writes do cause a conflict because
@@ -505,8 +506,8 @@ func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {

 // CHECK-LABEL: func @equivalent_func_arg(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @equivalent_func_arg(%t0: tensor<?xf32> {linalg.inplaceable = true},
-                          %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
+func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true},
+                               %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
  // CHECK-NOT: alloc
  // CHECK-NOT: copy
  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
@@ -534,8 +535,8 @@ func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {

 // CHECK-LABEL: func @equivalent_func_arg_2(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
-func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {linalg.inplaceable = true},
-                            %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
+func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true},
+                                 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
  // CHECK: scf.for {{.*}} {
  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
    // CHECK: %[[alloc:.*]] = memref.alloc
@@ -549,3 +550,23 @@ func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {linalg.inplaceable = true},
  }
  return %1: tensor<?xf32>
 }
+
+// -----
+
+// Bufferize without fully dynamic layout maps.
+
+// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
+// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
+func.func @transfer_read(
+    %A : tensor<?xf32> {bufferization.writable = false})
+  -> (vector<4xf32>)
+{
+  %c0 = arith.constant 0 : index
+  %f0 = arith.constant 0.0 : f32
+
+//       CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
+  %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
+
+//       CHECK: return %[[RES]] : vector<4xf32>
+  return %0 : vector<4xf32>
+}
--- a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@@ -7,9 +7,9 @@

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_1234(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -34,9 +34,9 @@ func.func @fill_extract_matmul_1234(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_1243(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -60,9 +60,10 @@ func.func @fill_extract_matmul_1243(
 // -----

 // CHECK-LABEL: func @fill_extract_matmul_
-func.func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func.func @fill_extract_matmul_1324(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -86,9 +87,10 @@ func.func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_la
 // -----

 // CHECK-LABEL: func @fill_extract_matmul_
-func.func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func.func @fill_extract_matmul_1342(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -112,9 +114,10 @@ func.func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_la
 // -----

 // CHECK-LABEL: func @fill_extract_matmul_
-func.func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func.func @fill_extract_matmul_1423(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -138,9 +141,10 @@ func.func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_la
 // -----

 // CHECK-LABEL: func @fill_extract_matmul_
-func.func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-                        %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+func.func @fill_extract_matmul_1432(
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -165,9 +169,9 @@ func.func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_la

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2134(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -192,9 +196,9 @@ func.func @fill_extract_matmul_2134(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2143(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -219,9 +223,9 @@ func.func @fill_extract_matmul_2143(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2314(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -246,9 +250,9 @@ func.func @fill_extract_matmul_2314(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2341(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -273,9 +277,9 @@ func.func @fill_extract_matmul_2341(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2413(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -300,9 +304,9 @@ func.func @fill_extract_matmul_2413(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_2431(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -327,9 +331,9 @@ func.func @fill_extract_matmul_2431(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3124(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -354,9 +358,9 @@ func.func @fill_extract_matmul_3124(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3142(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -381,10 +385,9 @@ func.func @fill_extract_matmul_3142(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3214(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-  -> tensor<256x256xf32>
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
  %cst = arith.constant 0.000000e+00 : f32
@@ -408,9 +411,9 @@ func.func @fill_extract_matmul_3214(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3241(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -435,9 +438,9 @@ func.func @fill_extract_matmul_3241(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3412(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -462,9 +465,9 @@ func.func @fill_extract_matmul_3412(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_3421(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -489,9 +492,9 @@ func.func @fill_extract_matmul_3421(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4123(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -516,9 +519,9 @@ func.func @fill_extract_matmul_4123(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4132(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -543,9 +546,9 @@ func.func @fill_extract_matmul_4132(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4213(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -570,9 +573,9 @@ func.func @fill_extract_matmul_4213(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4231(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -597,9 +600,9 @@ func.func @fill_extract_matmul_4231(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4312(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
@@ -624,9 +627,9 @@ func.func @fill_extract_matmul_4312(

 // CHECK-LABEL: func @fill_extract_matmul_
 func.func @fill_extract_matmul_4321(
-    %arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
  -> tensor<256x256xf32>
 {
  %c0 = arith.constant 0 : index
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir
@@ -3,9 +3,9 @@
 // CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
 //  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
 func.func @linalg_op_bufferizes_inplace_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
+    %t1: tensor<?x?xf32> {bufferization.writable = true},
+    %t2: tensor<?xf32> {bufferization.writable = false},
+    %t3: tensor<?x?xf32> {bufferization.writable = false},
    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
  %r = linalg.generic {
@@ -27,9 +27,9 @@ func.func @linalg_op_bufferizes_inplace_with_input(
 // CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
 //  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
 func.func @linalg_op_bufferizes_out_of_place_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = false},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
+    %t1: tensor<?x?xf32> {bufferization.writable = false},
+    %t2: tensor<?xf32> {bufferization.writable = false},
+    %t3: tensor<?x?xf32> {bufferization.writable = false},
    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
  // CHECK: %[[alloc:.*]] = memref.alloc
  // CHECK: memref.copy %[[t1]], %[[alloc]]
@@ -54,9 +54,9 @@ func.func @linalg_op_bufferizes_out_of_place_with_input(
 // CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
 //  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
 func.func @linalg_op_output_cannot_alias_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = true},
+    %t1: tensor<?x?xf32> {bufferization.writable = true},
+    %t2: tensor<?xf32> {bufferization.writable = false},
+    %t3: tensor<?x?xf32> {bufferization.writable = true},
    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
  %r = linalg.generic {
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir
@@ -16,9 +16,9 @@

 // CHECK-LABEL: func @linalg_op_same_out_tensors(
 func.func @linalg_op_same_out_tensors(
-    %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %t1: tensor<?xf32> {bufferization.writable = true},
 // CHECK-SAME:          bufferization.access = "read-write"
-    %t2: tensor<?xf32> {linalg.inplaceable = true})
+    %t2: tensor<?xf32> {bufferization.writable = true})
 // CHECK-SAME:          bufferization.access = "write"
  -> (tensor<?xf32>, tensor<?xf32>){

@@ -54,9 +54,9 @@ func.func @linalg_op_same_out_tensors(

 // CHECK-LABEL: func @linalg_op_same_out_tensors_2(
 func.func @linalg_op_same_out_tensors_2(
-    %t1: tensor<?xf32> {linalg.inplaceable = true},
+    %t1: tensor<?xf32> {bufferization.writable = true},
 // CHECK-SAME:          bufferization.access = "read-write"
-    %t2: tensor<?xf32> {linalg.inplaceable = true})
+    %t2: tensor<?xf32> {bufferization.writable = true})
 // CHECK-SAME:          bufferization.access = "write"
        -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){

--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//

 // CHECK-LABEL: func @buffer_forwarding_conflict
-func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
+func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {bufferization.writable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
@@ -34,7 +34,7 @@ func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable =
 // -----

 // CHECK-LABEL: func @buffer_forwarding_no_conflict
-func.func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
+func.func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {bufferization.writable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
  %cst = arith.constant 0.000000e+00 : f32
  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir
@@ -6,7 +6,7 @@
 // CHECK-SAME:   %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
 // CHECK-SAME:   %[[sz:[0-9a-zA-Z]*]]: index
 func.func @buffer_forwarding_conflict(
-  %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
+  %t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0)>, bufferization.writable = true},
  %sz: index)
    -> (tensor<?xf32>, tensor<?xf32>)
 {
@@ -43,7 +43,7 @@ func.func @buffer_forwarding_conflict(
 // CHECK-SAME:   %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
 // CHECK-SAME:   %[[sz:[0-9a-zA-Z]*]]: index
 func.func @buffer_forwarding_no_conflict(
-  %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
+  %t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0)>, bufferization.writable = true},
  %sz: index)
    -> (tensor<?xf32>)
 {
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -8,31 +8,13 @@
 // Test bufferization using memref types that have no layout map.
 // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP

-// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
-// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
-func.func @transfer_read(
-    %A : tensor<?xf32> {linalg.inplaceable = false})
-  -> (vector<4xf32>)
-{
-  %c0 = arith.constant 0 : index
-  %f0 = arith.constant 0.0 : f32
-
-//       CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, #{{.*}}>, vector<4xf32>
-  %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
-
-//       CHECK: return %[[RES]] : vector<4xf32>
-  return %0 : vector<4xf32>
-}
-
-// -----
-
 // CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>

 // CHECK-LABEL: func @fill_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 // CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
 func.func @fill_inplace(
-    %A : tensor<?xf32> {linalg.inplaceable = true})
+    %A : tensor<?xf32> {bufferization.writable = true})
  -> tensor<?xf32>
 {
  //     CHECK: %[[F0:.*]] = arith.constant 0.000000e+00 : f32
@@ -51,7 +33,7 @@ func.func @fill_inplace(
 // -----

 // CHECK-LABEL: func @tensor_extract(%{{.*}}: memref<?xf32, #{{.*}}>) -> f32 {
-func.func @tensor_extract(%A : tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
+func.func @tensor_extract(%A : tensor<?xf32> {bufferization.writable = false}) -> (f32) {
  %c0 = arith.constant 0 : index

 //       CHECK: %[[RES:.*]] = memref.load {{.*}} : memref<?xf32, #{{.*}}>
@@ -65,12 +47,12 @@ func.func @tensor_extract(%A : tensor<?xf32> {linalg.inplaceable = false}) -> (f

 // CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>

-/// No linalg.inplaceable flag, must allocate.
+/// No bufferization.writable flag, must allocate.
 // CHECK-LABEL: func @not_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>) -> memref<?xf32> {
 // CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?xf32>) -> memref<?xf32>
 func.func @not_inplace(
-    %A : tensor<?xf32> {linalg.inplaceable = false})
+    %A : tensor<?xf32> {bufferization.writable = false})
  -> tensor<?xf32>
 {
  //     CHECK: %[[F0:.*]] = arith.constant 0.000000e+00 : f32
@@ -94,7 +76,7 @@ func.func @not_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, #[[$map_2d_dyn]]>) {
 // CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
 func.func @not_inplace(
-    %A : tensor<?x?xf32> {linalg.inplaceable = true})
+    %A : tensor<?x?xf32> {bufferization.writable = true})
  -> tensor<?x?xf32>
 {
  %f0 = arith.constant 0.0 : f32
@@ -120,7 +102,8 @@ func.func @not_inplace(
 // -----

 // CHECK-LABEL: func @not_inplace
-func.func @not_inplace(%A : tensor<?x?xf32> {linalg.inplaceable = true}) -> tensor<?x?xf32> {
+func.func @not_inplace(
+    %A : tensor<?x?xf32> {bufferization.writable = true}) -> tensor<?x?xf32> {
  /// Within op multiple uses of %A, must alloc.
  // CHECK: alloc
  %r = linalg.matmul  ins(%A, %A: tensor<?x?xf32>, tensor<?x?xf32>)
@@ -132,8 +115,9 @@ func.func @not_inplace(%A : tensor<?x?xf32> {linalg.inplaceable = true}) -> tens
 // -----

 // CHECK-LABEL: func @vec_inplace
-func.func @vec_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : vector<4xf32>)
-    -> tensor<?xf32>
+func.func @vec_inplace(
+    %A : tensor<?xf32> {bufferization.writable = true}, %vec : vector<4xf32>)
+  -> tensor<?xf32>
 {
  %c0 = arith.constant 0 : index

@@ -151,8 +135,9 @@ func.func @vec_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : ve

 // CHECK-LABEL: func @vec_not_inplace
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-func.func @vec_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : vector<4xf32>)
-    -> (tensor<?xf32>, tensor<?xf32>)
+func.func @vec_not_inplace(
+    %A : tensor<?xf32> {bufferization.writable = true}, %vec : vector<4xf32>)
+  -> (tensor<?xf32>, tensor<?xf32>)
 {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
@@ -182,10 +167,11 @@ func.func @vec_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec
 //  CHECK-SAME:   %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
 //  CHECK-SAME:   %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>,
 //  CHECK-SAME:   %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @insert_slice_fun(%A0 : tensor<?xf32> {linalg.inplaceable = false},
-                       %A1 : tensor<?xf32> {linalg.inplaceable = true},
-                       %t0 : tensor<4xf32> {linalg.inplaceable = false},
-                       %t1 : tensor<4xf32> {linalg.inplaceable = true})
+func.func @insert_slice_fun(
+    %A0 : tensor<?xf32> {bufferization.writable = false},
+    %A1 : tensor<?xf32> {bufferization.writable = true},
+    %t0 : tensor<4xf32> {bufferization.writable = false},
+    %t1 : tensor<4xf32> {bufferization.writable = true})
  ->  (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
 {
  // Hoisted allocs.
@@ -230,8 +216,8 @@ func.func @insert_slice_fun(%A0 : tensor<?xf32> {linalg.inplaceable = false},
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 //  CHECK-SAME:   %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
 func.func @insert_slice_fun(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %t : tensor<4xf32> {linalg.inplaceable = false})
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %t : tensor<4xf32> {bufferization.writable = false})
  -> tensor<?xf32>
 {
  %f0 = arith.constant 0.0 : f32
@@ -258,8 +244,8 @@ func.func @insert_slice_fun(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 //  CHECK-SAME:   %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
 func.func @insert_slice_fun(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %t : tensor<4xf32> {linalg.inplaceable = false})
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %t : tensor<4xf32> {bufferization.writable = false})
  -> tensor<?xf32>
 {
  %f0 = arith.constant 0.0 : f32
@@ -286,8 +272,8 @@ func.func @insert_slice_fun(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 //  CHECK-SAME:   %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
 func.func @insert_slice_fun_not_inplace(
-    %A : tensor<?xf32> {linalg.inplaceable = false},
-    %t : tensor<4xf32> {linalg.inplaceable = false})
+    %A : tensor<?xf32> {bufferization.writable = false},
+    %t : tensor<4xf32> {bufferization.writable = false})
  -> tensor<?xf32>
 {
  //      CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
@@ -312,9 +298,10 @@ func.func @insert_slice_fun_not_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
 //  CHECK-SAME:   %[[t:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 //  CHECK-SAME:   ) -> memref<?xf32> {
-func.func @scf_for_yield_only(%A : tensor<?xf32> {linalg.inplaceable = false},
-                         %B : tensor<?xf32> {linalg.inplaceable = true},
-                         %lb : index, %ub : index, %step : index)
+func.func @scf_for_yield_only(
+    %A : tensor<?xf32> {bufferization.writable = false},
+    %B : tensor<?xf32> {bufferization.writable = true},
+    %lb : index, %ub : index, %step : index)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  //     CHECK:   %[[ALLOC_FOR_A:.*]] = memref.alloc
@@ -342,8 +329,8 @@ func.func @scf_for_yield_only(%A : tensor<?xf32> {linalg.inplaceable = false},
 // just want to make sure that it does not crash.

 // CHECK-LABEL: func @nested_scf_for
-func.func @nested_scf_for(%A : tensor<?xf32> {linalg.inplaceable = true},
-                     %v : vector<5xf32>) -> tensor<?xf32> {
+func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
+                          %v : vector<5xf32>) -> tensor<?xf32> {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %c10 = arith.constant 10 : index
@@ -366,10 +353,10 @@ func.func @nested_scf_for(%A : tensor<?xf32> {linalg.inplaceable = true},
 //  CHECK-SAME:   %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
 //  CHECK-SAME:   %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
 func.func @scf_for_with_tensor.insert_slice(
-   %A : tensor<?xf32> {linalg.inplaceable = false},
-   %B : tensor<?xf32> {linalg.inplaceable = true},
-   %C : tensor<4xf32> {linalg.inplaceable = false},
-   %lb : index, %ub : index, %step : index)
+    %A : tensor<?xf32> {bufferization.writable = false},
+    %B : tensor<?xf32> {bufferization.writable = true},
+    %C : tensor<4xf32> {bufferization.writable = false},
+    %lb : index, %ub : index, %step : index)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  //     CHECK:   %[[ALLOC_FOR_A:.*]] = memref.alloc
@@ -407,8 +394,9 @@ func.func @scf_for_with_tensor.insert_slice(

 // CHECK-LABEL: func @execute_region_with_conflict(
 //  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
-func.func @execute_region_with_conflict(%t1 : tensor<?xf32> {linalg.inplaceable = "true"})
-    -> (f32, tensor<?xf32>, f32)
+func.func @execute_region_with_conflict(
+    %t1 : tensor<?xf32> {bufferization.writable = "true"})
+  -> (f32, tensor<?xf32>, f32)
 {
  %f1 = arith.constant 0.0 : f32
  %idx = arith.constant 7 : index
@@ -439,10 +427,10 @@ func.func @execute_region_with_conflict(%t1 : tensor<?xf32> {linalg.inplaceable
 // CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32>
 // CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<128x192xf32>
 func.func @matmul(
-    %A: tensor<128x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %B: tensor<256x192xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
-    %C: tensor<128x192xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-      -> tensor<128x192xf32> {
+    %A: tensor<128x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %B: tensor<256x192xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
+    %C: tensor<128x192xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
+  -> tensor<128x192xf32> {
  %c0 = arith.constant 0 : index
  %c256 = arith.constant 256 : index
  %c32 = arith.constant 32 : index
@@ -513,8 +501,8 @@ func.func @matmul(
 //       CHECK:   %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
 //       CHECK:   memref.copy %[[alloc]], %[[subview]]
 func.func @tensor_cast_not_in_place(
-    %A : tensor<?xf32> {linalg.inplaceable = true},
-    %B : tensor<?xf32> {linalg.inplaceable = false}, %idx: index)
+    %A : tensor<?xf32> {bufferization.writable = true},
+    %B : tensor<?xf32> {bufferization.writable = false}, %idx: index)
  -> (tensor<?xf32>)
 {
  %r0 = tensor.cast %A : tensor<?xf32> to tensor<4xf32>
@@ -533,7 +521,7 @@ func.func @tensor_cast_not_in_place(

 // CHECK-LABEL: func @dominance_violation_bug_1
 func.func @dominance_violation_bug_1(
-    %A : tensor<?x?xf32> {linalg.inplaceable = false},
+    %A : tensor<?x?xf32> {bufferization.writable = false},
    %idx : index)
  -> tensor<?x?xf32>
 {
@@ -553,8 +541,8 @@ func.func @dominance_violation_bug_1(
 // CHECK-LABEL: func @scf_if_inplace(
 //  CHECK-SAME:     %[[cond:.*]]: i1, %[[t1:.*]]: memref<?xf32{{.*}}>, %[[v:.*]]: vector
 func.func @scf_if_inplace(%cond: i1,
-                     %t1: tensor<?xf32> {linalg.inplaceable = true},
-                     %v: vector<5xf32>, %idx: index) -> tensor<?xf32> {
+                          %t1: tensor<?xf32> {bufferization.writable = true},
+                          %v: vector<5xf32>, %idx: index) -> tensor<?xf32> {

  //      CHECK: scf.if %[[cond]] {
  // CHECK-NEXT: } else {
@@ -582,9 +570,12 @@ func.func @scf_if_inplace(%cond: i1,
 //       CHECK:       vector.transfer_write
 //       CHECK:     }
 //       CHECK:   }
-func.func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
-                      %v: vector<5xf32>, %idx: index,
-                      %cond: i1) -> tensor<?xf32> {
+func.func @scf_if_inside_scf_for(
+    %t1: tensor<?xf32> {bufferization.writable = true},
+    %v: vector<5xf32>, %idx: index,
+    %cond: i1)
+  -> tensor<?xf32>
+{
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %c10 = arith.constant 10 : index
@@ -606,8 +597,8 @@ func.func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
 //  CHECK-SAME:     %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}>
 func.func @scf_if_non_equiv_yields(
    %b : i1,
-    %A : tensor<4xf32> {linalg.inplaceable = false},
-    %B : tensor<4xf32> {linalg.inplaceable = false})
+    %A : tensor<4xf32> {bufferization.writable = false},
+    %B : tensor<4xf32> {bufferization.writable = false})
  -> tensor<4xf32>
 {
  // CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]]
@@ -624,8 +615,8 @@ func.func @scf_if_non_equiv_yields(

 // CHECK-LABEL: func @insert_op
 //  CHECK-SAME:     %[[t1:.*]]: memref<?xf32, {{.*}}>, %[[s:.*]]: f32, %[[i:.*]]: index
-func.func @insert_op(%t1 : tensor<?xf32> {linalg.inplaceable = true},
-                %s : f32, %i : index) -> tensor<?xf32> {
+func.func @insert_op(%t1 : tensor<?xf32> {bufferization.writable = true},
+                     %s : f32, %i : index) -> tensor<?xf32> {
  // CHECK: memref.store %[[s]], %[[t1]][%[[i]]]
  %0 = tensor.insert %s into %t1[%i] : tensor<?xf32>
  // CHECK: return
@@ -635,9 +626,11 @@ func.func @insert_op(%t1 : tensor<?xf32> {linalg.inplaceable = true},
 // -----

 func.func @gather_like(
-    %arg0 : tensor<?x?xf32> {linalg.inplaceable = false},
-    %arg1 : tensor<?xi32> {linalg.inplaceable = false},
-    %arg2 : tensor<?x?xf32> {linalg.inplaceable = true}) -> tensor<?x?xf32> {
+    %arg0 : tensor<?x?xf32> {bufferization.writable = false},
+    %arg1 : tensor<?xi32> {bufferization.writable = false},
+    %arg2 : tensor<?x?xf32> {bufferization.writable = true})
+  -> tensor<?x?xf32>
+{
  %0 = linalg.generic {
      indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
@@ -667,10 +660,12 @@ func.func @gather_like(
 // CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
 //  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
 func.func @linalg_op_bufferizes_inplace_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
-    %t2: tensor<?xf32> {linalg.inplaceable = true},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = true},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
+    %t1: tensor<?x?xf32> {bufferization.writable = true},
+    %t2: tensor<?xf32> {bufferization.writable = true},
+    %t3: tensor<?x?xf32> {bufferization.writable = true},
+    %s1: index, %s2: index, %cst: f32)
+  -> tensor<?x?xf32>
+{
  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
  %r = linalg.generic {
    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -699,7 +694,7 @@ func.func @linalg_op_bufferizes_inplace_with_input(
 // CHECK-LABEL: func @op_is_reading_but_following_ops_are_not
 //  CHECK-SAME:     %[[t0:.*]]: memref<?xf32
 func.func @op_is_reading_but_following_ops_are_not(
-    %t0 : tensor<?xf32> {linalg.inplaceable = false},
+    %t0 : tensor<?xf32> {bufferization.writable = false},
    %cst : f32)
  -> tensor<?xf32>
 {
@@ -751,8 +746,8 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x
 // CHECK-LABEL: func @write_to_select_op_source
 //  CHECK-SAME:     %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
 func.func @write_to_select_op_source(
-    %t1 : tensor<?xf32> {linalg.inplaceable = true},
-    %t2 : tensor<?xf32> {linalg.inplaceable = true},
+    %t1 : tensor<?xf32> {bufferization.writable = true},
+    %t2 : tensor<?xf32> {bufferization.writable = true},
    %c : i1)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
@@ -773,8 +768,8 @@ func.func @write_to_select_op_source(
 // CHECK-LABEL: func @write_after_select_read_one
 //  CHECK-SAME:     %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
 func.func @write_after_select_read_one(
-    %t1 : tensor<?xf32> {linalg.inplaceable = true},
-    %t2 : tensor<?xf32> {linalg.inplaceable = true},
+    %t1 : tensor<?xf32> {bufferization.writable = true},
+    %t2 : tensor<?xf32> {bufferization.writable = true},
    %c : i1)
  -> (f32, tensor<?xf32>)
 {
@@ -910,9 +905,8 @@ func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,

 // CHECK-LABEL: func @scf_for_swapping_yields(
 //  CHECK-SAME:     %[[A:.*]]: memref<?xf32, #{{.*}}>, %[[B:.*]]: memref<?xf32, #{{.*}}>
-
 func.func @scf_for_swapping_yields(
-    %A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true},
+    %A : tensor<?xf32>, %B : tensor<?xf32> {bufferization.writable = true},
    %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
  -> (f32, f32)
 {
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -7259,7 +7259,6 @@ cc_library(
        ":LinalgStructuredOpsIncGen",
        ":MathDialect",
        ":MemRefDialect",
-        ":ModuleBufferization",
        ":Pass",
        ":SCFDialect",
        ":SCFTransforms",
@@ -7281,25 +7280,6 @@ cc_library(
    ],
 )

-cc_library(
-    name = "ModuleBufferization",
-    srcs = [
-        "lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":BufferizationDialect",
-        ":BufferizationTransforms",
-        ":FuncDialect",
-        ":IR",
-        ":MemRefDialect",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
    name = "TilingInterface",
    srcs = ["lib/Interfaces/TilingInterface.cpp"],