[mlir] Remove AppendToArgumentsList functionality from BufferizeTypeConverter.

This functionality is superceded by BufferResultsToOutParams pass (see https://reviews.llvm.org/D90071) for users the require buffers to be out-params. That pass should be run immediately after all tensors are gone from the program (before buffer optimizations and deallocation insertion), such as immediately after a "finalizing" bufferize pass. The -test-finalizing-bufferize pass now defaults to what used to be the `allowMemrefFunctionResults=true` flag. and the finalizing-bufferize-allowed-memref-results.mlir file is moved to test/Transforms/finalizing-bufferize.mlir. Differential Revision: https://reviews.llvm.org/D90778
2020-11-04 10:57:29 -08:00
parent f347d78cca
commit f7bc568266
6 changed files with 119 additions and 680 deletions
--- a/mlir/include/mlir/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Transforms/Bufferize.h
@@ -44,12 +44,6 @@ namespace mlir {
 /// except for the ranked-tensor types which is converted to memref types.
 class BufferizeTypeConverter : public TypeConverter {
 public:
-  /// This enum is for showing how buffer placement operation converters should
-  /// conduct with certain result type after type conversion. This value can be
-  /// set/get for each specific type using setResultConversionKind or
-  /// getResultConversionKind.
-  enum ResultConversionKind { AppendToArgumentsList, KeepAsFunctionResult };
-
  BufferizeTypeConverter();

  /// This method tries to decompose a value of a certain type using provided
@@ -82,26 +76,6 @@ public:
    addConversion(std::forward<FnT>(callback));
  }

-  /// This method returns ResultConversionKind for the mapping from `origin`
-  /// type to `input` type.
-  ResultConversionKind getResultConversionKind(Type origin, Type input);
-
-  /// This method registers ResultConversionKind for the mapping from type 'T'
-  /// to type 'U'.
-  template <typename T, typename U>
-  void setResultConversionKind(ResultConversionKind kind) {
-    assert((kind != AppendToArgumentsList ||
-            llvm::is_one_of<U, MemRefType, UnrankedMemRefType>::value) &&
-           "Only the memref typed values can be set to be appended to the "
-           "function argument list at the moment");
-    resultTypeConversions.emplace_back(
-        [=](Type origin, Type input) -> Optional<ResultConversionKind> {
-          if (origin.template isa<T>() && input.template isa<U>())
-            return kind;
-          return llvm::None;
-        });
-  }
-
 private:
  using DecomposeValueConversionCallFn = std::function<Optional<LogicalResult>(
      OpBuilder &, Location, Type, Value, SmallVectorImpl<Value> &)>;
@@ -109,9 +83,6 @@ private:
  using DecomposeTypeConversionCallFn =
      std::function<Optional<LogicalResult>(Type, SmallVectorImpl<Type> &)>;

-  using ResultConversionKindFn =
-      std::function<Optional<ResultConversionKind>(Type, Type)>;
-
  /// Generate a wrapper for the given decompose value conversion callback.
  template <typename T, typename FnT>
  DecomposeValueConversionCallFn
@@ -139,7 +110,6 @@ private:
    };
  }

-  SmallVector<ResultConversionKindFn, 2> resultTypeConversions;
  SmallVector<DecomposeValueConversionCallFn, 2> decomposeValueConversions;
  SmallVector<DecomposeTypeConversionCallFn, 2> decomposeTypeConversions;
 };
@@ -221,48 +191,10 @@ public:
  LogicalResult
  matchAndRewrite(ReturnOpSourceTy returnOp, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const final {
-    Location loc = returnOp.getLoc();
-
-    // Split the operands depending on whether they need a copy operation or
-    // they remain as operands of the return operation. If an operand is
-    // decomposable and a decompose callback function has been provided by the
-    // user, it will be unpacked.
-    SmallVector<Value, 2> newOperands, needCopyOperands;
-    OpBuilder builder(returnOp);
-    for (auto operand : llvm::enumerate(operands)) {
-      SmallVector<Value, 2> values;
-      this->converter.tryDecomposeValue(builder, loc, operand.value().getType(),
-                                        operand.value(), values);
-      Type type = returnOp.getOperand(operand.index()).getType();
-      SmallVector<Type, 2> originTypes;
-      this->converter.tryDecomposeType(type, originTypes);
-      for (auto value : llvm::enumerate(values)) {
-        Type origin = originTypes[value.index()];
-        Type converted = value.value().getType();
-        auto kind = this->converter.getResultConversionKind(origin, converted);
-        if (kind == BufferizeTypeConverter::KeepAsFunctionResult)
-          newOperands.push_back(value.value());
-        else
-          // kind = BufferizeTypeConverter::AppendToArgumentsList
-          needCopyOperands.push_back(value.value());
-      }
-    }
-
-    // Insert Copy operations instead for the operands that have been removed
-    // from operand list and appended to the function arguments list.
-    Block &entryBlock = returnOp.getParentRegion()->front();
-    unsigned numFuncArgs = entryBlock.getNumArguments();
-    if (needCopyOperands.size() > numFuncArgs)
-      return returnOp.emitError(
-          "The number of operands that need Copy operations is more "
-          "than the number of target function arguments.");
-    unsigned destArgNum = numFuncArgs - needCopyOperands.size();
-    rewriter.setInsertionPoint(returnOp);
-    for (Value operand : needCopyOperands) {
-      rewriter.create<CopyOpTy>(loc, operand,
-                                entryBlock.getArgument(destArgNum));
-      ++destArgNum;
-    }
+    SmallVector<Value, 2> newOperands;
+    for (auto operand : operands)
+      this->converter.tryDecomposeValue(
+          rewriter, returnOp.getLoc(), operand.getType(), operand, newOperands);
    rewriter.replaceOpWithNewOp<ReturnOpTargetTy>(returnOp, newOperands);
    return success();
  }
--- a/mlir/lib/Transforms/Bufferize.cpp
+++ b/mlir/lib/Transforms/Bufferize.cpp
@@ -63,15 +63,6 @@ void BufferizeTypeConverter::tryDecomposeType(Type type,
  types.push_back(type);
 }

-/// This method returns ResultConversionKind for the input type.
-BufferizeTypeConverter::ResultConversionKind
-BufferizeTypeConverter::getResultConversionKind(Type origin, Type converted) {
-  for (auto &conversion : resultTypeConversions)
-    if (auto res = conversion(origin, converted))
-      return res.getValue();
-  return KeepAsFunctionResult;
-}
-
 void mlir::populateBufferizeMaterializationLegality(ConversionTarget &target) {
  target.addLegalOp<TensorLoadOp, TensorToMemrefOp>();
 }
@@ -140,16 +131,8 @@ LogicalResult BufferizeFuncOpConverter::matchAndRewrite(
  for (Type resultType : funcType.getResults()) {
    SmallVector<Type, 2> originTypes;
    converter.tryDecomposeType(resultType, originTypes);
-    for (auto origin : originTypes) {
-      Type converted = converter.convertType(origin);
-      auto kind = converter.getResultConversionKind(origin, converted);
-      if (kind == BufferizeTypeConverter::AppendToArgumentsList) {
-        conversion.addInputs(converted);
-      } else {
-        assert(kind == BufferizeTypeConverter::KeepAsFunctionResult);
-        newResultTypes.push_back(converted);
-      }
-    }
+    for (auto origin : originTypes)
+      newResultTypes.push_back(converter.convertType(origin));
  }

  if (failed(rewriter.convertRegionTypes(&funcOp.getBody(), converter,
@@ -168,66 +151,12 @@ LogicalResult BufferizeFuncOpConverter::matchAndRewrite(
 // BufferizeCallOpConverter
 //===----------------------------------------------------------------------===//

-namespace {
-// This class represents a mapping from a result to a list of values and some
-// results that have not yet constructed. Instead, the indices of these
-// results in the operation that will be constructed are known. They will be
-// replaced with the actual values when they are available. The order of
-// adding to this mapping is important.
-class CallOpResultMapping {
-public:
-  CallOpResultMapping() { order = 0; };
-
-  /// Add an available value to the mapping.
-  void addMapping(Value value) { toValuesMapping.push_back({order++, value}); }
-
-  /// Add the index of unavailble result value to the mapping.
-  void addMapping(unsigned index) {
-    toIndicesMapping.push_back({order++, index});
-  }
-
-  /// This method returns the mapping values list. The unknown result values
-  /// that only their indices are available are replaced with their values.
-  void getMappingValues(ValueRange valuesToReplaceIndices,
-                        SmallVectorImpl<Value> &values) {
-    // Append available values to the list.
-    SmallVector<std::pair<unsigned, Value>, 2> res(toValuesMapping.begin(),
-                                                   toValuesMapping.end());
-    // Replace the indices with the actual values.
-    for (const std::pair<unsigned, unsigned> &entry : toIndicesMapping) {
-      assert(entry.second < valuesToReplaceIndices.size() &&
-             "The value index is out of range.");
-      res.push_back({entry.first, valuesToReplaceIndices[entry.second]});
-    }
-    // Sort the values based on their adding orders.
-    llvm::sort(res, [](const std::pair<unsigned, Value> &v1,
-                       const std::pair<unsigned, Value> &v2) {
-      return v1.first < v2.first;
-    });
-    // Fill the values.
-    for (const std::pair<unsigned, Value> &entry : res)
-      values.push_back(entry.second);
-  }
-
-private:
-  /// Keeping the inserting order of mapping values.
-  int order;
-
-  /// Containing the mapping values with their inserting orders.
-  SmallVector<std::pair<unsigned, Value>, 2> toValuesMapping;
-
-  /// Containing the indices of result values with their inserting orders.
-  SmallVector<std::pair<unsigned, unsigned>, 2> toIndicesMapping;
-};
-} // namespace
-
 /// Performs the actual rewriting step.
 LogicalResult BufferizeCallOpConverter::matchAndRewrite(
    CallOp callOp, ArrayRef<Value> operands,
    ConversionPatternRewriter &rewriter) const {

  Location loc = callOp.getLoc();
-  OpBuilder builder(callOp);
  SmallVector<Value, 2> newOperands;

  // TODO: if the CallOp references a FuncOp that only has a declaration (e.g.
@@ -237,39 +166,25 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(

  // Create the operands list of the new `CallOp`. It unpacks the decomposable
  // values if a decompose callback function has been provided by the user.
-  for (auto operand : operands) {
-    SmallVector<Value, 2> values;
-    converter.tryDecomposeValue(builder, loc, operand.getType(), operand,
-                                values);
-    newOperands.append(values.begin(), values.end());
-  }
+  for (auto operand : operands)
+    converter.tryDecomposeValue(rewriter, loc, operand.getType(), operand,
+                                newOperands);

-  // Create the new result types for the new `CallOp` and a mapping from the old
-  // result to new value(s).
+  // Create the new result types for the new `CallOp` and track the indices in
+  // the new call op's results that correspond to the old call op's results.
  SmallVector<Type, 2> newResultTypes;
-  SmallVector<CallOpResultMapping, 4> mappings;
-  mappings.resize(callOp.getNumResults());
+  SmallVector<SmallVector<int, 2>, 4> expandedResultIndices;
+  expandedResultIndices.resize(callOp.getNumResults());
  for (auto result : llvm::enumerate(callOp.getResults())) {
    SmallVector<Type, 2> originTypes;
    converter.tryDecomposeType(result.value().getType(), originTypes);
-    auto &resultMapping = mappings[result.index()];
+    auto &resultMapping = expandedResultIndices[result.index()];
    for (Type origin : originTypes) {
      Type converted = converter.convertType(origin);
-      auto kind = converter.getResultConversionKind(origin, converted);
-      if (kind == BufferizeTypeConverter::KeepAsFunctionResult) {
-        newResultTypes.push_back(converted);
-        // The result value is not yet available. Its index is kept and it is
-        // replaced with the actual value of the new `CallOp` later.
-        resultMapping.addMapping(newResultTypes.size() - 1);
-      } else {
-        // kind = BufferizeTypeConverter::AppendToArgumentsList
-        MemRefType memref = converted.dyn_cast<MemRefType>();
-        if (!memref)
-          return callOp.emitError("Cannot allocate for a non-Memref type");
-        Value alloc = rewriter.create<AllocOp>(loc, memref);
-        newOperands.push_back(alloc);
-        resultMapping.addMapping(alloc);
-      }
+      newResultTypes.push_back(converted);
+      // The result value is not yet available. Its index is kept and it is
+      // replaced with the actual value of the new `CallOp` later.
+      resultMapping.push_back(newResultTypes.size() - 1);
    }
  }

@@ -278,12 +193,12 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(

  // Build a replacing value for each result to replace its uses. If a result
  // has multiple mapping values, it needs to be packed to a single value.
-  OpBuilder nextBuilder(callOp.getOperation()->getNextNode());
  SmallVector<Value, 2> replacedValues;
  replacedValues.reserve(callOp.getNumResults());
  for (unsigned i = 0, e = callOp.getNumResults(); i < e; ++i) {
-    SmallVector<Value, 2> valuesToPack;
-    mappings[i].getMappingValues(newCallOp.getResults(), valuesToPack);
+    auto valuesToPack = llvm::to_vector<6>(
+        llvm::map_range(expandedResultIndices[i],
+                        [&](int i) { return newCallOp.getResult(i); }));
    if (valuesToPack.empty()) {
      // No replacement is required.
      replacedValues.push_back(nullptr);
@@ -293,7 +208,7 @@ LogicalResult BufferizeCallOpConverter::matchAndRewrite(
      // Values need to be packed using callback function. The same callback
      // that is used for materializeArgumentConversion is used for packing.
      Value packed = converter.materializeArgumentConversion(
-          nextBuilder, loc, callOp.getType(i), valuesToPack);
+          rewriter, loc, callOp.getType(i), valuesToPack);
      replacedValues.push_back(packed);
    }
  }
--- a/mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir
+++ b/mlir/test/Transforms/finalizing-bufferize-allowed-memref-results.mlir
@@ -1,184 +0,0 @@
-// RUN: mlir-opt -test-finalizing-bufferize-with-allowed-memref-results -split-input-file %s | FileCheck %s
-
-// Since allowMemrefEscaping is active for Bufferization in this test pass,
-// all tensor typed function results are converted to memref and remain as
-// function results. All memref typed function results will escape from the
-// deallocation phase of Bufferization.
-
-// CHECK-LABEL: func @void_function_signature_conversion
-func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
-    return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(
-  %arg0: tensor<5xf32>,
-  %arg1: memref<10xf32>,
-  %arg2: i1, %arg3: f16) -> (
-    i1,
-    tensor<5xf32>,
-    memref<10xf32>,
-    memref<15xf32>,
-    f16) {
-  %0 = alloc() : memref<15xf32>
-  %1 = test.tensor_based in(%arg0 : tensor<5xf32>) -> tensor<5xf32>
-  return %arg2, %1, %arg1, %0, %arg3 :
-   i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
-}
-//      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
-// CHECK-SAME: %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
-//      CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-//      CHECK: %[[TENSOR_ALLOC:.*]] = alloc()
-//      CHECK: return %[[ARG2]], %[[TENSOR_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[ARG3]]
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
-  return
-}
-// CHECK: ({{.*}}: memref<4x8xf32>)
-
-// -----
-
-// CHECK-LABEL: func @no_signature_conversion_is_needed
-func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){
-  return %arg0, %arg1 : i1, f16
-}
-// CHECK: (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: f16) -> (i1, f16)
-// CHECK: return %[[ARG0]], %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @simple_signature_conversion
-func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
-  return %arg0 : tensor<4x8xf32>
-}
-//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
-// CHECK-NEXT: return %[[ARG0]]
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg_and_result
-func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
-  return %arg0 : tensor<*xf32>
-}
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
-// CHECK-NEXT: return [[ARG]] : memref<*xf32>
-
-// -----
-
-// CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
-    cond_br %cond, ^bb1, ^bb2
-  ^bb1:
-    br ^exit(%arg0 : tensor<2xf32>)
-  ^bb2:
-    br ^exit(%arg0 : tensor<2xf32>)
-  ^exit(%arg2: tensor<2xf32>):
-    return %arg1 : tensor<4x4xf32>
-}
-//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]]
-//      CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
-//      CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
-//      CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT:  return %[[ARG1]]
-
-// -----
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
-  %buff = alloc() : memref<2xf32>
-  return %arg1, %buff : tensor<5xf32>, memref<2xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
-  %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
-  return %y#0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
-// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
-// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
-// CHECK: return %[[Y]]#0
-
-// -----
-
-// Test case: Testing BufferizeCallOpConverter to see if it matches with the
-// signature of the new signature of the callee function when there are tuple
-// typed args and results. BufferizeTypeConverter is set to flatten tuple typed
-// arguments. The tuple typed values should be decomposed and composed using
-// get_tuple_element and make_tuple operations of test dialect. Tensor types are
-// converted to Memref. Memref typed function results remain as function
-// results.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
-  return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
-  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
-}
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
-// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
-// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]
-
-// -----
-
-// Test case: Testing BufferizeFuncOpConverter and
-// BufferizeReturnOpConverter to see if the return operation matches with the
-// new function signature when there are tuple typed args and results.
-// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
-// typed values should be decomposed and composed using get_tuple_element and
-// make_tuple operations of test dialect. Tensor types are converted to Memref.
-// Memref typed function results remain as function results.
-
-// CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
-  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
-}
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
-// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
-// CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
-// CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
--- a/mlir/test/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Transforms/finalizing-bufferize.mlir
@@ -1,31 +1,36 @@
 // RUN: mlir-opt -test-finalizing-bufferize -split-input-file %s | FileCheck %s

-// CHECK-LABEL: func @func_signature_conversion
-func @func_signature_conversion(%arg0: tensor<4x8xf32>) {
+// CHECK-LABEL: func @void_function_signature_conversion
+func @void_function_signature_conversion(%arg0: tensor<4x8xf32>) {
    return
 }
-// CHECK: ({{.*}}: memref<4x8xf32>) {
+// CHECK: ({{.*}}: memref<4x8xf32>)

 // -----

-// Only tensor typed function result should be converted to memref and move to
-// the function arguments list. The other memref function results remain as
-// function results.
-
-// CHECK-LABEL: func @memref_in_function_results
-func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>)
-                            -> (tensor<5xf32>, memref<10xf32>, memref<15xf32>) {
+// CHECK-LABEL: func @complex_signature_conversion
+func @complex_signature_conversion(
+  %arg0: tensor<5xf32>,
+  %arg1: memref<10xf32>,
+  %arg2: i1,
+  %arg3: f16) -> (
+    i1,
+    tensor<5xf32>,
+    memref<10xf32>,
+    memref<15xf32>,
+    f16) {
  %0 = alloc() : memref<15xf32>
  %1 = test.tensor_based in(%arg0 : tensor<5xf32>) -> tensor<5xf32>
-  return %1, %arg1, %0 : tensor<5xf32>, memref<10xf32>, memref<15xf32>
+  return %arg2, %1, %arg1, %0, %arg3 :
+   i1, tensor<5xf32>, memref<10xf32>, memref<15xf32>, f16
 }
 //      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
-// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: (memref<10xf32>, memref<15xf32>)
+// CHECK-SAME: %[[ARG2:.*]]: i1, %[[ARG3:.*]]: f16)
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, memref<15xf32>, f16)
 //      CHECK: %[[FIRST_ALLOC:.*]] = alloc()
 //      CHECK: %[[TENSOR_ALLOC:.*]] = alloc()
-//      CHECK: test.copy(%[[TENSOR_ALLOC]], %[[RESULT]])
-//      CHECK: return %[[ARG1]], %[[FIRST_ALLOC]]
+//      CHECK: return %[[ARG2]], %[[TENSOR_ALLOC]], %[[ARG1]], %[[FIRST_ALLOC]],
+// CHECK-SAME: %[[ARG3]]

 // -----

@@ -33,7 +38,7 @@ func @memref_in_function_results(%arg0: tensor<5xf32>, %arg1: memref<10xf32>)
 func @no_signature_conversion_is_needed(%arg0: memref<4x8xf32>) {
  return
 }
-// CHECK: ({{.*}}: memref<4x8xf32>) {
+// CHECK: ({{.*}}: memref<4x8xf32>)

 // -----

@@ -46,39 +51,26 @@ func @no_signature_conversion_is_needed(%arg0: i1, %arg1: f16) -> (i1, f16){

 // -----

-// CHECK-LABEL: func @complex_signature_conversion
-func @complex_signature_conversion(%arg0: tensor<4x8xf32>, %arg1: i1,
-                                   %arg2: tensor<5x5xf64>,%arg3: f16) ->
-                                   (i1, tensor<5x5xf64>, f16, tensor<4x8xf32>) {
-    return %arg1, %arg2, %arg3, %arg0 : i1, tensor<5x5xf64>, f16,
-           tensor<4x8xf32>
+// CHECK-LABEL: func @simple_signature_conversion
+func @simple_signature_conversion(%arg0: tensor<4x8xf32>) -> tensor<4x8xf32> {
+  return %arg0 : tensor<4x8xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: memref<4x8xf32>, %[[ARG1:.*]]: i1
-// CHECK-SAME: %[[ARG2:.*]]: memref<5x5xf64>, %[[ARG3:.*]]: f16
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5x5xf64>
-// CHECK-SAME: %[[RESULT2:.*]]: memref<4x8xf32>) -> (i1, f16) {
-// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT2]])
-// CHECK-NEXT: return %[[ARG1]], %[[ARG3]]
+//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>) -> [[TYPE]]<[[RANK]]>
+// CHECK-NEXT: return %[[ARG0]]

 // -----

-// CHECK-LABEL: func @non_void_to_void_return_op_converter
-func @non_void_to_void_return_op_converter(%arg0: tensor<4x8xf32>)
-                                           -> tensor<4x8xf32> {
-  return %arg0 : tensor<4x8xf32>
+// CHECK-LABEL: func @func_with_unranked_arg_and_result
+func @func_with_unranked_arg_and_result(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  return %arg0 : tensor<*xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]]<[[RANK:.*]]>,
-// CHECK-SAME: %[[RESULT:.*]]: [[TYPE]]<[[RANK]]>) {
-// CHECK-NEXT: test.copy(%[[ARG0]], %[[RESULT]])
-// CHECK-NEXT: return
+// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>) -> memref<*xf32>
+// CHECK-NEXT: return [[ARG]] : memref<*xf32>

 // -----

 // CHECK-LABEL: func @func_and_block_signature_conversion
-func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1,
-                                          %arg1: tensor<4x4xf32>)
-                                          -> tensor<4x4xf32>{
+func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1, %arg1: tensor<4x4xf32>) -> tensor<4x4xf32>{
    cond_br %cond, ^bb1, ^bb2
  ^bb1:
    br ^exit(%arg0 : tensor<2xf32>)
@@ -87,293 +79,102 @@ func @func_and_block_signature_conversion(%arg0 : tensor<2xf32>, %cond : i1,
  ^exit(%arg2: tensor<2xf32>):
    return %arg1 : tensor<4x4xf32>
 }
-//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1,
-// CHECK-SAME: %[[ARG1:.*]]: [[ARG1_TYPE:.*]],
-// CHECK-SAME: %[[RESULT:.*]]: [[RESULT_TYPE:.*]]) {
+//      CHECK: (%[[ARG0:.*]]: [[ARG0_TYPE:.*]], %[[COND:.*]]: i1, %[[ARG1:.*]]: [[ARG1_TYPE:.*]]) -> [[RESULT_TYPE:.*]] {
 //      CHECK: br ^[[EXIT_BLOCK:.*]](%[[ARG0]] : [[ARG0_TYPE]])
 //      CHECK: br ^[[EXIT_BLOCK]](%[[ARG0]] : [[ARG0_TYPE]])
 //      CHECK: ^[[EXIT_BLOCK]](%{{.*}}: [[ARG0_TYPE]])
-// CHECK-NEXT: test.copy(%[[ARG1]], %[[RESULT]])
-// CHECK-NEXT: return
+// CHECK-NEXT:  return %[[ARG1]] : [[RESULT_TYPE]]

 // -----

-// Test Case: Simple case for checking if BufferizePlacer creates AllocOps
-//            right before TensorBasedOp.
-
-// CHECK-LABEL: func @compute_allocs_position_simple
-func @compute_allocs_position_simple(%cond: i1, %arg0: tensor<2xf32>)
-                                     -> tensor<2xf32>{
-    %0 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %1 = test.tensor_based in(%0 : tensor<2xf32>) -> tensor<2xf32>
-    return %1 : tensor<2xf32>
-}
-//      CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[FIRST_ALLOC]]
-//      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}} out(%[[SECOND_ALLOC]]
-
-// -----
-
-// Test Case: if-else case for checking if BufferizePlacer creates AllocOps
-//            right before TensorBasedOp.
-
-// CHECK-LABEL: func @compute_allocs_position
-func @compute_allocs_position(%cond: i1, %arg0: tensor<2xf32>) -> tensor<2xf32>{
-    %0 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %1 = test.tensor_based in(%0 : tensor<2xf32>) -> tensor<2xf32>
-    cond_br %cond, ^bb1(%arg0, %0: tensor<2xf32>, tensor<2xf32>),
-                   ^bb2(%0, %arg0: tensor<2xf32>, tensor<2xf32>)
-  ^bb1(%arg1 : tensor<2xf32>, %arg2 : tensor<2xf32>):
-    %2 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %3 = test.tensor_based in(%2 : tensor<2xf32>) -> tensor<2xf32>
-    br ^exit(%arg1, %arg2 : tensor<2xf32>, tensor<2xf32>)
-  ^bb2(%arg3 : tensor<2xf32>, %arg4 : tensor<2xf32>):
-    %4 = test.tensor_based in(%arg0 : tensor<2xf32>) -> tensor<2xf32>
-    %5 = test.tensor_based in(%4 : tensor<2xf32>) -> tensor<2xf32>
-    br ^exit(%arg3, %arg4 : tensor<2xf32>, tensor<2xf32>)
-  ^exit(%arg5 : tensor<2xf32>, %arg6 : tensor<2xf32>):
-    %6 = test.tensor_based in(%arg0 : tensor<2xf32>)  -> tensor<2xf32>
-    %7 = test.tensor_based in(%6 : tensor<2xf32>) -> tensor<2xf32>
-    return %7 : tensor<2xf32>
-}
-//      CHECK: (%{{.*}}: {{.*}}, %[[ARG0:.*]]: memref<2xf32>,
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC0]]
-//      CHECK: %[[ALLOC1:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC0]]{{.*}} out(%[[ALLOC1]]
-//      CHECK: cond_br %{{.*}}, ^[[BB0:.*]]({{.*}}), ^[[BB1:.*]](
-// CHECK-NEXT: ^[[BB0]]
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC2]]
-//      CHECK: %[[ALLOC3:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC2]]{{.*}} out(%[[ALLOC3]]
-//      CHECK: br ^[[EXIT:.*]]({{.*}})
-// CHECK-NEXT: ^[[BB1]]
-// CHECK-NEXT: %[[ALLOC4:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC4]]
-//      CHECK: %[[ALLOC5:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC4]]{{.*}} out(%[[ALLOC5]]
-//      CHECK: br ^[[EXIT]]
-// CHECK-NEXT: ^[[EXIT]]
-// CHECK-NEXT: %[[ALLOC6:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOC6]]
-//      CHECK: %[[ALLOC7:.*]] = alloc()
-// CHECK-NEXT: test.buffer_based in(%[[ALLOC6]]{{.*}} out(%[[ALLOC7]]
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together. The signature of `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>) -> ()
-
-// The operands and results of caller and return operations must be matched
-// respectively.
-
-// CHECK-LABEL: func @callee
-func @callee(%arg1: tensor<5xf32>) -> tensor<5xf32> {
-  %0 = test.tensor_based in(%arg1 : tensor<5xf32>) -> tensor<5xf32>
-  return %0 : tensor<5xf32>
-}
-// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[ALLOC:.*]] = alloc()
-// CHECK: test.buffer_based
-// CHECK: test.copy(%[[ALLOC]], %[[CALLEE_RESULT]])
-// CHECK: return
-
-// CHECK-LABEL: func @caller
-func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x = call @callee(%arg0) : (tensor<5xf32>) -> tensor<5xf32>
-  %y = call @callee(%x) : (tensor<5xf32>) -> tensor<5xf32>
-  return %y : tensor<5xf32>
-}
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[CALLER_ARG]], %[[FIRST_ALLOC]])
-// CHECK: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK: call @callee(%[[FIRST_ALLOC]], %[[SECOND_ALLOC]])
-// CHECK: test.copy(%[[SECOND_ALLOC]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// Test case: Checking BufferizeCallOpConverter and
-// BufferizeFuncOpConverter and BufferizeReturnOpConverter all
-// together on functions that also have memref typed results. The signature of
-// `callee` after signature conversion would be:
-
-// func @callee(%arg0: memref<5xf32>,%arg1: memref<5xf32>)-> memref<2xf32>
-
-// where %arg0 is the input and %arg1 is the output buffer and the original
-// memref type result remain as the function result. Then, the rewriter should
-// match the caller's signature with the callee. Thus, two buffers will be
-// allocated instead of %x0 and %y0 and they are passed to the callers' operands
-// list as the output buffers. %x1 and %y1 remain as callers' results.
-
 // CHECK-LABEL: func @callee
 func @callee(%arg1: tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>) {
  %buff = alloc() : memref<2xf32>
  return %arg1, %buff : tensor<5xf32>, memref<2xf32>
 }
-//      CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLEE_RESULT:.*]]: memref<5xf32>)
-// CHECK-SAME: memref<2xf32>
-//      CHECK: %[[ALLOC:.*]] = alloc()
-//      CHECK: test.copy(%[[CALLEE_ARG]], %[[CALLEE_RESULT]])
-//      CHECK: return %[[ALLOC]]
+// CHECK: (%[[CALLEE_ARG:.*]]: memref<5xf32>) -> (memref<5xf32>, memref<2xf32>)
+// CHECK: %[[ALLOC:.*]] = alloc()
+// CHECK: return %[[CALLEE_ARG]], %[[ALLOC]]

 // CHECK-LABEL: func @caller
 func @caller(%arg0: tensor<5xf32>) -> tensor<5xf32> {
-  %x0, %x1 = call @callee(%arg0) : (tensor<5xf32>)
-                                   -> (tensor<5xf32>, memref<2xf32>)
-  %y0, %y1 = call @callee(%x0) : (tensor<5xf32>)
-                                 -> (tensor<5xf32>, memref<2xf32>)
-  return %y0 : tensor<5xf32>
+  %x:2 = call @callee(%arg0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+  %y:2 = call @callee(%x#0) : (tensor<5xf32>) -> (tensor<5xf32>, memref<2xf32>)
+  return %y#0 : tensor<5xf32>
 }
-// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[CALLER_RESULT:.*]]: memref<5xf32>)
-// CHECK: %[[X0:.*]] = alloc()
-// CHECK: %[[X1:.*]] = call @callee(%[[CALLER_ARG]], %[[X0]])
-// CHECK: %[[Y0:.*]] = alloc()
-// CHECK: %[[Y1:.*]] = call @callee(%[[X0]], %[[Y0]])
-// CHECK: test.copy(%[[Y0]], %[[CALLER_RESULT]])
-// CHECK: return
-
-// -----
-
-// CHECK-LABEL: func @func_with_unranked_arg
-func @func_with_unranked_arg(%arg0: tensor<*xf32>) {
-  return
-}
-// CHECK-SAME: ([[ARG:%.*]]: memref<*xf32>)
+// CHECK: (%[[CALLER_ARG:.*]]: memref<5xf32>) -> memref<5xf32>
+// CHECK: %[[X:.*]]:2 = call @callee(%[[CALLER_ARG]])
+// CHECK: %[[Y:.*]]:2 = call @callee(%[[X]]#0)
+// CHECK: return %[[Y]]#0

 // -----

 // Test case: Testing BufferizeCallOpConverter to see if it matches with the
 // signature of the new signature of the callee function when there are tuple
-// typed args and results. BufferizeTypeConverter is set to flatten tuple
-// typed arguments. The tuple typed values should be decomposed and composed
-// using get_tuple_element and make_tuple operations of test dialect. Tensor
-// types are converted to Memref. Memref typed function results are appended to
-// the function arguments list.
+// typed args and results. BufferizeTypeConverter is set to flatten tuple typed
+// arguments. The tuple typed values should be decomposed and composed using
+// get_tuple_element and make_tuple operations of test dialect. Tensor types are
+// converted to Memref. Memref typed function results remain as function
+// results.

 // CHECK-LABEL: func @callee
-func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-             -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
+func @callee(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>){
  return %arg0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
 }
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1,
-// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]],
-// CHECK-SAME: %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]

 // CHECK-LABEL: func @caller
-func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-             -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
-  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-                              -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
-                            -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+func @caller(%arg0: tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> tuple<tensor<2xf32>,i1, tensor<5xf32>>{
+  %x0 = call @callee(%arg0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
+  %y0 = call @callee(%x0) : (tuple<tensor<2xf32>,i1, tensor<5xf32>>) -> (tuple<tensor<2xf32>,i1, tensor<5xf32>>)
  return %y0 : tuple<tensor<2xf32>,i1, tensor<5xf32>>
 }
-// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1,
-// CHECK-SAME: %[[ARG2:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<2xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]],
-// CHECK-SAME: %[[ARG2]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]],
-// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1,
-// CHECK-SAME: memref<5xf32>, memref<2xf32>, memref<5xf32>) -> i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]],
-// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[SECOND_ALLOC:.*]] = alloc()
-// CHECK-NEXT: %[[CALLEE_RESULT:.*]] = call @callee(%[[FIRST_ELEM]],
-// CHECK-SAME: %[[SECOND_ELEM]], %[[THIRD_ELEM]], %[[FIRST_ALLOC]],
-// CHECK-SAME: %[[SECOND_ALLOC]])
-// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>, memref<2xf32>, memref<5xf32>)
-// CHECK-SAME: i1
-// CHECK-NEXT: %[[TUPLE:.*]] = "test.make_tuple"(%[[FIRST_ALLOC]],
-// CHECK-SAME: %[[CALLEE_RESULT]], %[[SECOND_ALLOC]])
-// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 1 : i32}
-// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[TUPLE]])
-// CHECK-SAME: {index = 2 : i32}
-// CHECK-NEXT: test.copy(%[[FIRST_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[THIRD_ELEM]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_ELEM]]
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[ARG1:.*]]: i1, %[[ARG2:.*]]: memref<5xf32>)
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[ARG_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]], %[[ARG2]])
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[ARG_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RESULT_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RESULT_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: %[[CALLEE_RESULTS:.*]]:3 = call @callee(%[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]])
+// CHECK-SAME: (memref<2xf32>, i1, memref<5xf32>) -> (memref<2xf32>, i1, memref<5xf32>)
+// CHECK-NEXT: %[[RETURN_TUPLE:.*]] = "test.make_tuple"(%[[CALLEE_RESULTS]]#0, %[[CALLEE_RESULTS]]#1, %[[CALLEE_RESULTS]]#2)
+// CHECK-NEXT: %[[FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[THIRD_ELEM:.*]]  = "test.get_tuple_element"(%[[RETURN_TUPLE]]) {index = 2 : i32}
+// CHECK-NEXT: return %[[FIRST_ELEM]], %[[SECOND_ELEM]], %[[THIRD_ELEM]]

 // -----

-// Test case: Testing BufferizeFuncOpConverter and BufferizeReturnOpConverter
-// to see if the return operation matches with the new function signature when
-// there are tuple typed args and results. BufferizeTypeConverter is set to
-// flatten tuple typed arguments. The tuple typed values should be decomposed
-// and composed using get_tuple_element and make_tuple operations of test
-// dialect. Tensor types are converted to Memref. Memref typed function results
-// are appended to the function arguments list.
+// Test case: Testing BufferizeFuncOpConverter and
+// BufferizeReturnOpConverter to see if the return operation matches with the
+// new function signature when there are tuple typed args and results.
+// BufferizeTypeConverter is set to flatten tuple typed arguments. The tuple
+// typed values should be decomposed and composed using get_tuple_element and
+// make_tuple operations of test dialect. Tensor types are converted to Memref.
+// Memref typed function results remain as function results.

 // CHECK-LABEL: func @decompose_tuple_typed_function_args_and_results
-func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>,
-                                                      %arg1: tensor<10xf32>,
-                                                      %arg2: tuple<i1,
-                                                             tensor<5xf32>>)
-                                                      -> (tuple<i1,
-                                                                tensor<5xf32>>,
-                                                      tensor<10xf32>,
-                                                            tuple<i1,f32>){
-  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>,
-                                    tuple<i1,f32>
+func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg1: tensor<10xf32>, %arg2: tuple<i1, tensor<5xf32>>) -> (tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>){
+  return %arg2, %arg1, %arg0 : tuple<i1, tensor<5xf32>>, tensor<10xf32>, tuple<i1,f32>
 }
-// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32,
-// CHECK-SAME: %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1,
-// CHECK-SAME: %[[ARG4:.*]]: memref<5xf32>, %[[RESULT0:.*]]: memref<5xf32>,
-// CHECK-SAME: %[[RESULT1:.*]]: memref<10xf32>
-// CHECK-SAME: (i1, i1, f32)
+// CHECK-SAME: %[[ARG0:.*]]: i1, %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<10xf32>, %[[ARG3:.*]]: i1, %[[ARG4:.*]]: memref<5xf32>
+// CHECK-SAME: (i1, memref<5xf32>, memref<10xf32>, i1, f32)
 // CHECK-NEXT: %[[FIRST_TUPLE:.*]] = "test.make_tuple"(%[[ARG0]], %[[ARG1]])
 // CHECK-NEXT: %[[SECOND_TUPLE:.*]] = "test.make_tuple"(%[[ARG3]], %[[ARG4]])
-// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"
-// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[SECOND_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 0 : i32}
-// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"
-// CHECK-SAME: (%[[FIRST_TUPLE]]) {index = 1 : i32}
-// CHECK-NEXT: test.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]])
-// CHECK-NEXT: test.copy(%[[ARG2]], %[[RESULT1]])
-// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]],
-// CHECK-SAME: %[[FIRST_TUPLE_SECOND_ELEM]]
+// CHECK-NEXT: %[[SECOND_TUPLE_FIRST_ELEM:.*]]  = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[SECOND_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[SECOND_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_FIRST_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 0 : i32}
+// CHECK-NEXT: %[[FIRST_TUPLE_SECOND_ELEM:.*]] = "test.get_tuple_element"(%[[FIRST_TUPLE]]) {index = 1 : i32}
+// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[SECOND_TUPLE_SECOND_ELEM]], %[[ARG2]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
--- a/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp
+++ b/mlir/test/lib/Transforms/TestFinalizingBufferize.cpp
@@ -35,17 +35,9 @@ namespace {
 /// otherwise the IR will end up invalid. Thus, finalizing bufferization passes
 /// require an atomic change to the entire program (e.g. the whole module).
 ///
-/// `allowMemrefFunctionResults` informs the buffer finalization policy to allow
-/// functions that have memref typed results. Patterns involved with converting
-/// func/call/return respect the finalization policy to ensure a consistent
-/// atomic conversion of the entire module. `allowMemrefFunctionResults` also
-/// allows memref typed results to escape from the deallocation.
-///
 /// TODO: Split out BufferizeFinalizationPolicy from BufferizeTypeConverter.
-template <bool allowMemrefFunctionResults>
 struct TestFinalizingBufferizePass
-    : mlir::PassWrapper<TestFinalizingBufferizePass<allowMemrefFunctionResults>,
-                        OperationPass<ModuleOp>> {
+    : mlir::PassWrapper<TestFinalizingBufferizePass, OperationPass<ModuleOp>> {

  /// Converts tensor based test operations to buffer based ones using
  /// bufferize.
@@ -123,13 +115,6 @@ struct TestFinalizingBufferizePass
             converter.isLegal(&funcOp.getBody());
    });

-    auto kind = allowMemrefFunctionResults
-                    ? BufferizeTypeConverter::KeepAsFunctionResult
-                    : BufferizeTypeConverter::AppendToArgumentsList;
-    converter.setResultConversionKind<RankedTensorType, MemRefType>(kind);
-    converter.setResultConversionKind<UnrankedTensorType, UnrankedMemRefType>(
-        kind);
-
    converter.addDecomposeTypeConversion(
        [](TupleType tupleType, SmallVectorImpl<Type> &types) {
          tupleType.getFlattenedTypes(types);
@@ -175,17 +160,8 @@ struct TestFinalizingBufferizePass
 namespace mlir {
 namespace test {
 void registerTestFinalizingBufferizePass() {
-  PassRegistration<
-      TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/false>>(
+  PassRegistration<TestFinalizingBufferizePass>(
      "test-finalizing-bufferize", "Tests finalizing bufferize conversions");
 }
-
-void registerTestPreparationPassWithAllowedMemrefResults() {
-  PassRegistration<
-      TestFinalizingBufferizePass</*allowMemrefFunctionResults=*/true>>(
-      "test-finalizing-bufferize-with-allowed-memref-results",
-      "Tests finalizing buffierize conversions, allowing functions to have "
-      "memref typed results.");
-}
 } // namespace test
 } // namespace mlir
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -146,7 +146,6 @@ void registerTestPasses() {
  test::registerTestMemRefDependenceCheck();
  test::registerTestMemRefStrideCalculation();
  test::registerTestOpaqueLoc();
-  test::registerTestPreparationPassWithAllowedMemrefResults();
  test::registerTestRecursiveTypesPass();
  test::registerTestSCFUtilsPass();
  test::registerTestVectorConversions();