[mlir][tensor] Add tensor.dim operation

* Split memref.dim into two operations: memref.dim and tensor.dim. Both ops have the same builder interface and op argument names, so that they can be used with templates in patterns that apply to both tensors and memrefs (e.g., some patterns in Linalg). * Add constant materializer to TensorDialect (needed for folding in affine.apply etc.). * Remove some MemRefDialect dependencies, make some explicit. Differential Revision: https://reviews.llvm.org/D105165
2021-07-01 09:58:48 +09:00
parent 17858da022
commit c0a6318d96
68 changed files with 670 additions and 499 deletions
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -365,7 +365,6 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> {
                "dialect";
  let constructor = "mlir::createConvertShapeToStandardPass()";
  let dependentDialects = [
-    "memref::MemRefDialect",
    "StandardOpsDialect",
    "scf::SCFDialect",
    "tensor::TensorDialect"
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -34,6 +34,10 @@ SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
 /// Given an operation, retrieves the value of each dynamic dimension through
 /// constructing the necessary DimOp operators.
 SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
+
+// Helper function that creates a memref::DimOp or tensor::DimOp depending on
+// the type of `source`.
+Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim);
 } // namespace mlir

 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -512,7 +512,7 @@ def MemRef_DeallocOp : MemRef_Op<"dealloc", [MemRefsNormalizable]> {
 // DimOp
 //===----------------------------------------------------------------------===//

-def DimOp : MemRef_Op<"dim", [NoSideEffect, MemRefsNormalizable]> {
+def MemRef_DimOp : MemRef_Op<"dim", [NoSideEffect, MemRefsNormalizable]> {
  let summary = "dimension index operation";
  let description = [{
    The `dim` operation takes a memref and a dimension operand of type `index`.
@@ -538,18 +538,17 @@ def DimOp : MemRef_Op<"dim", [NoSideEffect, MemRefsNormalizable]> {
    ```
  }];

-  let arguments = (ins AnyTypeOf<[AnyTensor, AnyRankedOrUnrankedMemRef],
-                                 "any memref or tensor type">:$memrefOrTensor,
+  let arguments = (ins AnyRankedOrUnrankedMemRef:$source,
                       Index:$index);
  let results = (outs Index:$result);

  let assemblyFormat = [{
-    attr-dict $memrefOrTensor `,` $index `:` type($memrefOrTensor)
+    attr-dict $source `,` $index `:` type($source)
  }];

  let builders = [
-    OpBuilder<(ins "Value":$memrefOrTensor, "int64_t":$index)>,
-    OpBuilder<(ins "Value":$memrefOrTensor, "Value":$index)>
+    OpBuilder<(ins "Value":$source, "int64_t":$index)>,
+    OpBuilder<(ins "Value":$source, "Value":$index)>
  ];

  let extraClassDeclaration = [{
@@ -1288,6 +1287,7 @@ def TensorLoadOp : MemRef_Op<"tensor_load",

  let assemblyFormat = "$memref attr-dict `:` type($memref)";

+  let hasCanonicalizer = 1;
  let hasFolder = 1;
 }

--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
@@ -14,7 +14,7 @@ include "mlir/Pass/PassBase.td"
 def StdBufferize : FunctionPass<"std-bufferize"> {
  let summary = "Bufferize the std dialect";
  let constructor = "mlir::createStdBufferizePass()";
-  let dependentDialects = ["scf::SCFDialect"];
+  let dependentDialects = ["memref::MemRefDialect", "scf::SCFDialect"];
 }

 def StdExpandOps : FunctionPass<"std-expand"> {
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td
@@ -14,6 +14,7 @@ include "mlir/IR/OpBase.td"
 def Tensor_Dialect : Dialect {
  let name = "tensor";
  let cppNamespace = "::mlir::tensor";
+
  let description = [{
    The `tensor` dialect is intended to hold core tensor creation and
    manipulation ops, which are not strongly associated with any particular
@@ -43,6 +44,8 @@ def Tensor_Dialect : Dialect {
    dialect), and does not live in this dialect.

  }];
+
+  let hasConstantMaterializer = 1;
 }

 #endif // TENSOR_BASE
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -60,6 +60,58 @@ def Tensor_CastOp : Tensor_Op<"cast", [
  let verifier = ?;
 }

+//===----------------------------------------------------------------------===//
+// DimOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_DimOp : Tensor_Op<"dim", [NoSideEffect]> {
+  let summary = "dimension index operation";
+  let description = [{
+    The `dim` operation takes a tensor and a dimension operand of type `index`.
+    It returns the size of the requested dimension of the given tensor.
+    If the dimension index is out of bounds, the behavior is undefined.
+
+    The specified tensor type is that of the first operand.
+
+    Example:
+
+    ```mlir
+    // Always returns 4, can be constant folded:
+    %c0 = constant 0 : index
+    %x = tensor.dim %A, %c0 : tensor<4x?xf32>
+
+    // Returns the dynamic dimension of %A.
+    %c1 = constant 1 : index
+    %y = tensor.dim %A, %c1 : memref<4x?xf32>
+
+    // Equivalent generic form:
+    %x = "tensor.dim"(%A, %c0) : (memref<4x?xf32>, index) -> index
+    %y = "tensor.dim"(%A, %c1) : (memref<4x?xf32>, index) -> index
+    ```
+  }];
+
+  let arguments = (ins AnyTensor:$source,
+                       Index:$index);
+  let results = (outs Index:$result);
+
+  let assemblyFormat = [{
+    attr-dict $source `,` $index `:` type($source)
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$source, "int64_t":$index)>,
+    OpBuilder<(ins "Value":$source, "Value":$index)>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Helper function to get the index as a simple integer if it is constant.
+    Optional<int64_t> getConstantIndex();
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ExtractOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
@@ -14,7 +14,7 @@ include "mlir/Pass/PassBase.td"
 def TensorBufferize : FunctionPass<"tensor-bufferize"> {
  let summary = "Bufferize the `tensor` dialect";
  let constructor = "mlir::createTensorBufferizePass()";
-  let dependentDialects = ["scf::SCFDialect"];
+  let dependentDialects = ["scf::SCFDialect", "memref::MemRefDialect"];
 }

 #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES
--- a/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt
@@ -18,7 +18,6 @@ add_mlir_conversion_library(MLIRShapeToStandard

  LINK_LIBS PUBLIC
  MLIRIR
-  MLIRMemRef
  MLIRShape
  MLIRTensor
  MLIRPass
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@@ -9,7 +9,6 @@
 #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"

 #include "../PassDetail.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -140,7 +139,7 @@ LogicalResult BroadcastOpConverter::matchAndRewrite(
  // dimension in the tensor.
  SmallVector<Value> ranks, rankDiffs;
  llvm::append_range(ranks, llvm::map_range(transformed.shapes(), [&](Value v) {
-                       return lb.create<memref::DimOp>(v, zero);
+                       return lb.create<tensor::DimOp>(v, zero);
                     }));

  // Find the maximum rank
@@ -254,7 +253,7 @@ LogicalResult IsBroadcastableOpConverter::matchAndRewrite(
  // dimension in the tensor.
  SmallVector<Value> ranks, rankDiffs;
  llvm::append_range(ranks, llvm::map_range(transformed.shapes(), [&](Value v) {
-                       return lb.create<memref::DimOp>(v, zero);
+                       return lb.create<tensor::DimOp>(v, zero);
                     }));

  // Find the maximum rank
@@ -346,7 +345,7 @@ LogicalResult GetExtentOpConverter::matchAndRewrite(
  // circumvents the necessity to materialize the shape in memory.
  if (auto shapeOfOp = op.shape().getDefiningOp<ShapeOfOp>()) {
    if (shapeOfOp.arg().getType().isa<ShapedType>()) {
-      rewriter.replaceOpWithNewOp<memref::DimOp>(op, shapeOfOp.arg(),
+      rewriter.replaceOpWithNewOp<tensor::DimOp>(op, shapeOfOp.arg(),
                                                 transformed.dim());
      return success();
    }
@@ -377,7 +376,7 @@ RankOpConverter::matchAndRewrite(shape::RankOp op, ArrayRef<Value> operands,
    return failure();

  shape::RankOp::Adaptor transformed(operands);
-  rewriter.replaceOpWithNewOp<memref::DimOp>(op, transformed.shape(), 0);
+  rewriter.replaceOpWithNewOp<tensor::DimOp>(op, transformed.shape(), 0);
  return success();
 }

@@ -407,7 +406,7 @@ ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef<Value> operands,
  Value one = rewriter.create<ConstantIndexOp>(loc, 1);
  Type indexTy = rewriter.getIndexType();
  Value rank =
-      rewriter.create<memref::DimOp>(loc, indexTy, transformed.shape(), zero);
+      rewriter.create<tensor::DimOp>(loc, indexTy, transformed.shape(), zero);

  auto loop = rewriter.create<scf::ForOp>(
      loc, zero, rank, one, op.initVals(),
@@ -494,11 +493,11 @@ ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef<Value> operands,
  Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
  Value firstShape = transformed.shapes().front();
  Value firstRank =
-      rewriter.create<memref::DimOp>(loc, indexTy, firstShape, zero);
+      rewriter.create<tensor::DimOp>(loc, indexTy, firstShape, zero);
  Value result = nullptr;
  // Generate a linear sequence of compares, all with firstShape as lhs.
  for (Value shape : transformed.shapes().drop_front(1)) {
-    Value rank = rewriter.create<memref::DimOp>(loc, indexTy, shape, zero);
+    Value rank = rewriter.create<tensor::DimOp>(loc, indexTy, shape, zero);
    Value eqRank =
        rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, firstRank, rank);
    auto same = rewriter.create<IfOp>(
@@ -563,7 +562,7 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite(
    int64_t rank = rankedTensorTy.getRank();
    for (int64_t i = 0; i < rank; i++) {
      if (rankedTensorTy.isDynamicDim(i)) {
-        Value extent = rewriter.create<memref::DimOp>(loc, tensor, i);
+        Value extent = rewriter.create<tensor::DimOp>(loc, tensor, i);
        extentValues.push_back(extent);
      } else {
        Value extent =
@@ -587,7 +586,7 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite(
      op, getExtentTensorType(ctx), ValueRange{rank},
      [&](OpBuilder &b, Location loc, ValueRange args) {
        Value dim = args.front();
-        Value extent = b.create<memref::DimOp>(loc, tensor, dim);
+        Value extent = b.create<tensor::DimOp>(loc, tensor, dim);
        b.create<tensor::YieldOp>(loc, extent);
      });

@@ -617,7 +616,7 @@ LogicalResult SplitAtOpConversion::matchAndRewrite(
  SplitAtOp::Adaptor transformed(op);
  ImplicitLocOpBuilder b(op.getLoc(), rewriter);
  Value zero = b.create<ConstantIndexOp>(0);
-  Value rank = b.create<memref::DimOp>(transformed.operand(), zero);
+  Value rank = b.create<tensor::DimOp>(transformed.operand(), zero);

  // index < 0 ? index + rank : index
  Value originalIndex = transformed.index();
@@ -675,8 +674,8 @@ void ConvertShapeToStandardPass::runOnOperation() {
  // Setup target legality.
  MLIRContext &ctx = getContext();
  ConversionTarget target(ctx);
-  target.addLegalDialect<memref::MemRefDialect, StandardOpsDialect, SCFDialect,
-                         tensor::TensorDialect>();
+  target
+      .addLegalDialect<StandardOpsDialect, SCFDialect, tensor::TensorDialect>();
  target.addLegalOp<CstrRequireOp, FuncOp, ModuleOp>();

  // Setup conversion patterns.
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -2965,7 +2965,7 @@ struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
  LogicalResult
  matchAndRewrite(memref::DimOp dimOp, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const override {
-    Type operandType = dimOp.memrefOrTensor().getType();
+    Type operandType = dimOp.source().getType();
    if (operandType.isa<UnrankedMemRefType>()) {
      rewriter.replaceOp(dimOp, {extractSizeOfUnrankedMemRef(
                                    operandType, dimOp, operands, rewriter)});
@@ -2977,7 +2977,7 @@ struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
                                    operandType, dimOp, operands, rewriter)});
      return success();
    }
-    return failure();
+    llvm_unreachable("expected MemRefType or UnrankedMemRefType");
  }

 private:
@@ -2995,7 +2995,7 @@ private:
    // Extract pointer to the underlying ranked descriptor and bitcast it to a
    // memref<element_type> descriptor pointer to minimize the number of GEP
    // operations.
-    UnrankedMemRefDescriptor unrankedDesc(transformed.memrefOrTensor());
+    UnrankedMemRefDescriptor unrankedDesc(transformed.source());
    Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc);
    Value scalarMemRefDescPtr = rewriter.create<LLVM::BitcastOp>(
        loc,
@@ -3033,7 +3033,7 @@ private:
      int64_t i = index.getValue();
      if (memRefType.isDynamicDim(i)) {
        // extract dynamic size from the memref descriptor.
-        MemRefDescriptor descriptor(transformed.memrefOrTensor());
+        MemRefDescriptor descriptor(transformed.source());
        return descriptor.size(rewriter, loc, i);
      }
      // Use constant for static size.
@@ -3042,7 +3042,7 @@ private:
    }
    Value index = dimOp.index();
    int64_t rank = memRefType.getRank();
-    MemRefDescriptor memrefDescriptor(transformed.memrefOrTensor());
+    MemRefDescriptor memrefDescriptor(transformed.source());
    return memrefDescriptor.size(rewriter, loc, index, rank);
  }
 };
--- a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt
+++ b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt
@@ -14,7 +14,6 @@ add_mlir_conversion_library(MLIRTosaToLinalg
  MLIRLinalg
  MLIRLinalgUtils
  MLIRMath
-  MLIRMemRef
  MLIRPass
  MLIRTensor
  MLIRTosa
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -13,7 +13,6 @@
 #include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
@@ -1720,12 +1719,12 @@ struct ConcatConverter : public OpConversionPattern<tosa::ConcatOp> {
    offsets.resize(rank, rewriter.create<ConstantIndexOp>(loc, 0));

    for (int i = 0; i < rank; ++i) {
-      sizes.push_back(rewriter.create<memref::DimOp>(loc, args[0], i));
+      sizes.push_back(rewriter.create<tensor::DimOp>(loc, args[0], i));
    }

    Value resultDimSize = sizes[axis];
    for (auto arg : args.drop_front()) {
-      auto size = rewriter.create<memref::DimOp>(loc, arg, axisValue);
+      auto size = rewriter.create<tensor::DimOp>(loc, arg, axisValue);
      resultDimSize = rewriter.create<AddIOp>(loc, resultDimSize, size);
    }
    sizes[axis] = resultDimSize;
@@ -1739,7 +1738,7 @@ struct ConcatConverter : public OpConversionPattern<tosa::ConcatOp> {
        rewriter.create<linalg::FillOp>(loc, zeroVal, init).getResult(0);

    for (auto arg : args) {
-      sizes[axis] = rewriter.create<memref::DimOp>(loc, arg, axisValue);
+      sizes[axis] = rewriter.create<tensor::DimOp>(loc, arg, axisValue);
      result = rewriter.create<tensor::InsertSliceOp>(loc, arg, result, offsets,
                                                      sizes, strides);
      offsets[axis] = rewriter.create<AddIOp>(loc, offsets[axis], sizes[axis]);
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp
@@ -14,7 +14,6 @@
 #include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
@@ -34,15 +33,14 @@ struct TosaToLinalgOnTensors
 public:
  void getDependentDialects(DialectRegistry &registry) const override {
    registry.insert<linalg::LinalgDialect, math::MathDialect,
-                    memref::MemRefDialect, StandardOpsDialect,
-                    tensor::TensorDialect>();
+                    StandardOpsDialect, tensor::TensorDialect>();
  }

  void runOnFunction() override {
    RewritePatternSet patterns(&getContext());
    ConversionTarget target(getContext());
-    target.addLegalDialect<linalg::LinalgDialect, memref::MemRefDialect,
-                           StandardOpsDialect, tensor::TensorDialect>();
+    target.addLegalDialect<linalg::LinalgDialect, StandardOpsDialect,
+                           tensor::TensorDialect>();
    target.addIllegalDialect<tosa::TosaDialect>();

    // Not every TOSA op can be legalized to linalg.
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -166,7 +166,7 @@ static Value generateInBoundsCheck(
  Location loc = xferOp.getLoc();
  ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
  if (!xferOp.isDimInBounds(0) && !isBroadcast) {
-    Value memrefDim = lb.create<memref::DimOp>(xferOp.source(), *dim);
+    Value memrefDim = createOrFoldDimOp(b, loc, xferOp.source(), *dim);
    AffineExpr d0, d1;
    bindDims(xferOp.getContext(), d0, d1);
    Value base = xferOp.indices()[dim.getValue()];
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -60,13 +60,14 @@ remainsLegalAfterInline(Value value, Region *src, Region *dest,
  if (value.isa<BlockArgument>())
    return legalityCheck(mapping.lookup(value), dest);

-  // If it's a top-level value beacuse it's defined in the region,
+  // If it's a top-level value because it's defined in the region,
  // it can only be inlined if the defining op is a constant or a
  // `dim`, which can appear anywhere and be valid, since the defining
  // op won't be top-level anymore after inlining.
  Attribute operandCst;
  return matchPattern(value.getDefiningOp(), m_Constant(&operandCst)) ||
-         value.getDefiningOp<memref::DimOp>();
+         value.getDefiningOp<memref::DimOp>() ||
+         value.getDefiningOp<tensor::DimOp>();
 }

 /// Checks if all values known to be legal affine dimensions or symbols in `src`
@@ -298,7 +299,9 @@ bool mlir::isValidDim(Value value, Region *region) {
  // The dim op is okay if its operand memref/tensor is defined at the top
  // level.
  if (auto dimOp = dyn_cast<memref::DimOp>(op))
-    return isTopLevelValue(dimOp.memrefOrTensor());
+    return isTopLevelValue(dimOp.source());
+  if (auto dimOp = dyn_cast<tensor::DimOp>(op))
+    return isTopLevelValue(dimOp.source());
  return false;
 }

@@ -319,14 +322,15 @@ static bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, unsigned index,
 }

 /// Returns true if the result of the dim op is a valid symbol for `region`.
-static bool isDimOpValidSymbol(memref::DimOp dimOp, Region *region) {
-  // The dim op is okay if its operand memref is defined at the top level.
-  if (isTopLevelValue(dimOp.memrefOrTensor()))
+template <typename OpTy>
+static bool isDimOpValidSymbol(OpTy dimOp, Region *region) {
+  // The dim op is okay if its source is defined at the top level.
+  if (isTopLevelValue(dimOp.source()))
    return true;

  // Conservatively handle remaining BlockArguments as non-valid symbols.
  // E.g. scf.for iterArgs.
-  if (dimOp.memrefOrTensor().isa<BlockArgument>())
+  if (dimOp.source().template isa<BlockArgument>())
    return false;

  // The dim op is also okay if its operand memref is a view/subview whose
@@ -335,7 +339,7 @@ static bool isDimOpValidSymbol(memref::DimOp dimOp, Region *region) {
  assert(index.hasValue() &&
         "expect only `dim` operations with a constant index");
  int64_t i = index.getValue();
-  return TypeSwitch<Operation *, bool>(dimOp.memrefOrTensor().getDefiningOp())
+  return TypeSwitch<Operation *, bool>(dimOp.source().getDefiningOp())
      .Case<memref::ViewOp, memref::SubViewOp, memref::AllocOp>(
          [&](auto op) { return isMemRefSizeValidSymbol(op, i, region); })
      .Default([](Operation *) { return false; });
@@ -364,7 +368,7 @@ bool mlir::isValidSymbol(Value value) {
  return false;
 }

-/// A value can be used as a symbol for `region` iff it meets onf of the the
+/// A value can be used as a symbol for `region` iff it meets one of the
 /// following conditions:
 /// *) It is a constant.
 /// *) It is the result of an affine apply operation with symbol arguments.
@@ -407,6 +411,8 @@ bool mlir::isValidSymbol(Value value, Region *region) {
  // Dim op results could be valid symbols at any level.
  if (auto dimOp = dyn_cast<memref::DimOp>(defOp))
    return isDimOpValidSymbol(dimOp, region);
+  if (auto dimOp = dyn_cast<tensor::DimOp>(defOp))
+    return isDimOpValidSymbol(dimOp, region);

  // Check for values dominating `region`'s parent op.
  Operation *regionOp = region ? region->getParentOp() : nullptr;
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -196,7 +196,7 @@ SmallVector<Value, 4> LinalgOp::createFlatListOfOperandDims(OpBuilder &b,
  SmallVector<Value, 4> res;
  for (OpOperand *opOperand : getInputAndOutputOperands()) {
    for (int64_t i = 0, e = getRank(opOperand); i < e; ++i)
-      res.push_back(b.createOrFold<memref::DimOp>(loc, opOperand->get(), i));
+      res.push_back(createOrFoldDimOp(b, loc, opOperand->get(), i));
  }
  return res;
 }
@@ -305,8 +305,7 @@ LogicalResult LinalgOp::reifyReturnTypeShapesPerResultDim(
    SmallVector<Value> shapes;
    for (int64_t dim : llvm::seq<int64_t>(0, getRank(opOperand))) {
      if (checkDimExpr.visit(shapeExprs[pos]))
-        shapes.push_back(
-            b.createOrFold<memref::DimOp>(loc, opOperand->get(), dim));
+        shapes.push_back(createOrFoldDimOp(b, loc, opOperand->get(), dim));
      else
        shapes.push_back(allResultDimValues[pos]);
      pos++;
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -925,7 +925,7 @@ PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad,
  assert(rankedTensorType.hasStaticShape());
  int rank = rankedTensorType.getRank();
  for (int i = 0; i < rank; ++i) {
-    auto dimOp = builder.createOrFold<memref::DimOp>(loc, source, i);
+    auto dimOp = builder.createOrFold<tensor::DimOp>(loc, source, i);
    auto resultDimSize = builder.createOrFold<ConstantIndexOp>(
        loc, rankedTensorType.getDimSize(i));
    auto highValue = builder.createOrFold<SubIOp>(loc, resultDimSize, dimOp);
@@ -945,7 +945,7 @@ LogicalResult PadTensorOp::reifyReturnTypeShapesPerResultDim(
  for (auto dim : llvm::seq<int64_t>(0, getSourceType().getRank())) {
    // Shape along each dimension is source dim + low pad + high pad.
    SmallVector<Value> mapOperands;
-    mapOperands.push_back(b.createOrFold<memref::DimOp>(loc, source(), dim));
+    mapOperands.push_back(b.createOrFold<tensor::DimOp>(loc, source(), dim));
    AffineExpr expr = b.getAffineDimExpr(0);
    unsigned numSymbols = 0;
    auto addOpFoldResult = [&](OpFoldResult valueOrAttr) {
@@ -1545,7 +1545,7 @@ getCollapsedOutputDimFromInputShape(OpBuilder &builder, Location loc,
  AffineExpr expr;
  SmallVector<Value, 2> dynamicDims;
  for (auto dim : llvm::seq(startPos, endPos + 1)) {
-    dynamicDims.push_back(builder.createOrFold<memref::DimOp>(loc, src, dim));
+    dynamicDims.push_back(builder.createOrFold<tensor::DimOp>(loc, src, dim));
    AffineExpr currExpr = builder.getAffineSymbolExpr(dim - startPos);
    expr = (expr ? expr * currExpr : currExpr);
  }
@@ -1614,7 +1614,7 @@ static OpFoldResult getExpandedOutputDimFromInputShape(
           "dimensions");
    linearizedStaticDim *= d.value();
  }
-  Value sourceDim = builder.create<memref::DimOp>(loc, src, sourceDimPos);
+  Value sourceDim = builder.create<tensor::DimOp>(loc, src, sourceDimPos);
  return applyMapToValues(
      builder, loc,
      AffineMap::get(
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
@@ -1186,8 +1186,7 @@ static Value createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc,
  SmallVector<Value> dynShape;
  for (auto dim : enumerate(memRefType.getShape()))
    if (dim.value() == ShapedType::kDynamicSize)
-      dynShape.push_back(
-          b.create<memref::DimOp>(loc, shapedValue, dim.index()));
+      dynShape.push_back(createOrFoldDimOp(b, loc, shapedValue, dim.index()));

  Value allocated = b.create<memref::AllocOp>(loc, allocMemRefType, dynShape);
  Value casted = allocated;
@@ -1304,14 +1303,14 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op,

 /// DimOp tensor operand is modified inplace. This allows leaving dead
 /// tensors behind that will get DCE'd.
-static LogicalResult bufferize(OpBuilder &b, memref::DimOp dimOp,
+static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp,
                               BlockAndValueMapping &bvm,
                               const BufferizationAliasInfo &aliasInfo) {
-  if (dimOp.memrefOrTensor().getType().isa<RankedTensorType>()) {
-    Value v = lookup(bvm, dimOp.memrefOrTensor());
+  if (dimOp.source().getType().isa<RankedTensorType>()) {
+    Value v = lookup(bvm, dimOp.source());
    if (!v)
      return failure();
-    dimOp.memrefOrTensorMutable().assign(v);
+    dimOp.sourceMutable().assign(v);
  }
  return success();
 }
@@ -1814,8 +1813,8 @@ bufferizeFuncOpInternals(FuncOp funcOp, BlockAndValueMapping &bvm,
            .Case<memref::BufferCastOp,
                  memref::TensorLoadOp>(
                [&](auto) { return success(); })
-            .Case<memref::DimOp,
-                  scf::ForOp,
+            .Case<scf::ForOp,
+                  tensor::DimOp,
                  LinalgOp,
                  ReturnOp,
                  ExtractSliceOp,
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -187,8 +187,7 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
                              << loopRanges.back() << "\n");
    } else {
      auto shapeDim = getShapeDefiningLoopRange(producer, i);
-      Value dim = b.createOrFold<memref::DimOp>(loc, shapeDim.shape,
-                                                shapeDim.dimension);
+      Value dim = createOrFoldDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
      tileSizes.push_back(zero);
      sizeBounds.push_back(dim);
      loopRanges.push_back(Range{zero, dim, one});
--- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
@@ -1344,7 +1344,7 @@ struct RemoveOutsDependency : public OpRewritePattern<GenericOp> {
        for (auto dim : llvm::enumerate(operandType.getShape())) {
          if (dim.value() != ShapedType::kDynamicSize)
            continue;
-          dynamicDims.push_back(rewriter.createOrFold<memref::DimOp>(
+          dynamicDims.push_back(rewriter.createOrFold<tensor::DimOp>(
              loc, operandVal, dim.index()));
        }
        Value initTensor = rewriter.create<InitTensorOp>(
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -182,7 +182,7 @@ Value getPaddedInput(OpBuilder &b, Location loc, Value input,
      conds.push_back(leftOutOfBound);
    else
      conds.push_back(b.create<OrOp>(loc, conds.back(), leftOutOfBound));
-    Value rightBound = b.create<memref::DimOp>(loc, input, idx);
+    Value rightBound = createOrFoldDimOp(b, loc, input, idx);
    Value rightOutOfBound =
        b.create<CmpIOp>(loc, CmpIPredicate::sge, dim, rightBound);
    conds.push_back(b.create<OrOp>(loc, conds.back(), rightOutOfBound));
@@ -558,6 +558,7 @@ static void lowerLinalgToLoopsImpl(FuncOp funcOp) {
  RewritePatternSet patterns(context);
  patterns.add<LinalgRewritePattern<LoopType>>(context);
  memref::DimOp::getCanonicalizationPatterns(patterns, context);
+  tensor::DimOp::getCanonicalizationPatterns(patterns, context);
  AffineApplyOp::getCanonicalizationPatterns(patterns, context);
  patterns.add<FoldAffineOp>(context);
  // Just apply the patterns greedily.
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -204,7 +204,7 @@ static LogicalResult rewriteAsPaddedOp(PatternRewriter &rewriter,
    SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0));
    auto sizes = llvm::to_vector<4>(llvm::map_range(
        llvm::seq<unsigned>(0, rank), [&](unsigned d) -> OpFoldResult {
-          auto dimOp = rewriter.create<memref::DimOp>(loc, std::get<0>(it), d);
+          auto dimOp = rewriter.create<tensor::DimOp>(loc, std::get<0>(it), d);
          newUsersOfOpToPad.insert(dimOp);
          return dimOp.getResult();
        }));
@@ -786,8 +786,8 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
    auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
    auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]);
    auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]);
-    auto srcSize = rewriter.createOrFold<memref::DimOp>(
-        loc, padOp.source(), dim);
+    auto srcSize =
+        rewriter.createOrFold<tensor::DimOp>(loc, padOp.source(), dim);

    // The new amount of low padding is `low - offset`. Except for the case
    // where none of the low padding is read. In that case, the new amount of
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -702,7 +702,7 @@ struct GenericPadTensorOpVectorizationPattern
    SmallVector<int64_t> staticSizes;
    for (unsigned dim = 0; dim < resultType.getRank(); ++dim) {
      if (resultType.isDynamicDim(dim)) {
-        auto srcSize = rewriter.createOrFold<memref::DimOp>(
+        auto srcSize = rewriter.createOrFold<tensor::DimOp>(
            padOp.getLoc(), padOp.source(), dim);
        // Add low and high padding value.
        auto plusLow = rewriter.createOrFold<AddIOp>(
@@ -732,7 +732,7 @@ struct GenericPadTensorOpVectorizationPattern
    SmallVector<OpFoldResult> srcSizes;
    for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) {
      if (sourceType.isDynamicDim(dim)) {
-        srcSizes.push_back(rewriter.createOrFold<memref::DimOp>(
+        srcSizes.push_back(rewriter.createOrFold<tensor::DimOp>(
            padOp.getLoc(), padOp.source(), dim));
      } else {
        srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim)));
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -176,8 +176,8 @@ IntegerAttr getSmallestBoundingIndex(Value size) {
                         .getResult(0)
                         .dyn_cast<AffineConstantExpr>())
      boundingConst = cExpr.getValue();
-  } else if (auto dimOp = size.getDefiningOp<memref::DimOp>()) {
-    auto shape = dimOp.memrefOrTensor().getType().dyn_cast<ShapedType>();
+  } else if (auto dimOp = size.getDefiningOp<tensor::DimOp>()) {
+    auto shape = dimOp.source().getType().dyn_cast<ShapedType>();
    if (auto constOp = dimOp.index().getDefiningOp<ConstantOp>()) {
      if (auto indexAttr = constOp.value().dyn_cast<IntegerAttr>()) {
        auto dimIndex = indexAttr.getInt();
@@ -566,7 +566,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
      if (!isTiled(map.getSubMap({r}), tileSizes)) {
        offsets.push_back(b.getIndexAttr(0));
-        Value dim = b.createOrFold<memref::DimOp>(loc, shapedOp, r);
+        Value dim = createOrFoldDimOp(b, loc, shapedOp, r);
        sizes.push_back(dim);
        strides.push_back(b.getIndexAttr(1));
        LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
@@ -603,7 +603,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
            AffineMap::inferFromExprList(
                ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
                .front();
-        Value d = b.create<memref::DimOp>(loc, shapedOp, r);
+        Value d = createOrFoldDimOp(b, loc, shapedOp, r);
        SmallVector<Value, 4> operands{size, d, offset};
        fullyComposeAffineMapAndOperands(&minMap, &operands);
        size = b.create<AffineMinOp>(loc, b.getIndexType(), minMap, operands);
--- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Transforms/InliningUtils.h"

 using namespace mlir;
@@ -37,12 +38,24 @@ SmallVector<Value, 4> mlir::getDynOperands(Location loc, Value val,
  SmallVector<Value, 4> dynOperands;
  auto shapedType = val.getType().cast<ShapedType>();
  for (auto dim : llvm::enumerate(shapedType.getShape())) {
-    if (dim.value() == MemRefType::kDynamicSize)
-      dynOperands.push_back(b.create<memref::DimOp>(loc, val, dim.index()));
+    if (dim.value() == ShapedType::kDynamicSize)
+      dynOperands.push_back(createOrFoldDimOp(b, loc, val, dim.index()));
  }
  return dynOperands;
 }

+// Helper function that creates a memref::DimOp or tensor::DimOp depending on
+// the type of `source`.
+// TODO: Move helper function out of MemRef dialect.
+Value mlir::createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
+                              int64_t dim) {
+  if (source.getType().isa<UnrankedMemRefType, MemRefType>())
+    return b.createOrFold<memref::DimOp>(loc, source, dim);
+  if (source.getType().isa<UnrankedTensorType, RankedTensorType>())
+    return b.createOrFold<tensor::DimOp>(loc, source, dim);
+  llvm_unreachable("Expected MemRefType or TensorType");
+}
+
 void mlir::memref::MemRefDialect::initialize() {
  addOperations<DmaStartOp, DmaWaitOp,
 #define GET_OP_LIST
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -602,17 +602,17 @@ LogicalResult DeallocOp::fold(ArrayRef<Attribute> cstOperands,
 // DimOp
 //===----------------------------------------------------------------------===//

-void DimOp::build(OpBuilder &builder, OperationState &result, Value memref,
+void DimOp::build(OpBuilder &builder, OperationState &result, Value source,
                  int64_t index) {
  auto loc = result.location;
  Value indexValue = builder.create<ConstantIndexOp>(loc, index);
-  build(builder, result, memref, indexValue);
+  build(builder, result, source, indexValue);
 }

-void DimOp::build(OpBuilder &builder, OperationState &result, Value memref,
+void DimOp::build(OpBuilder &builder, OperationState &result, Value source,
                  Value index) {
  auto indexTy = builder.getIndexType();
-  build(builder, result, indexTy, memref, index);
+  build(builder, result, indexTy, source, index);
 }

 Optional<int64_t> DimOp::getConstantIndex() {
@@ -628,14 +628,11 @@ static LogicalResult verify(DimOp op) {
    return success();

  // Check that constant index is not knowingly out of range.
-  auto type = op.memrefOrTensor().getType();
+  auto type = op.source().getType();
  if (auto memrefType = type.dyn_cast<MemRefType>()) {
    if (index.getValue() >= memrefType.getRank())
      return op.emitOpError("index is out of range");
-  } else if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
-    if (index.getValue() >= tensorType.getRank())
-      return op.emitOpError("index is out of range");
-  } else if (type.isa<UnrankedMemRefType>() || type.isa<UnrankedTensorType>()) {
+  } else if (type.isa<UnrankedMemRefType>()) {
    // Assume index to be in range.
  } else {
    llvm_unreachable("expected operand with memref type");
@@ -644,63 +641,27 @@ static LogicalResult verify(DimOp op) {
 }

 OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
-  auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
-
  // All forms of folding require a known index.
+  auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
  if (!index)
    return {};

-  auto argTy = memrefOrTensor().getType();
+  // Folding for unranked types (UnrankedMemRefType) is not supported.
+  auto memrefType = source().getType().dyn_cast<MemRefType>();
+  if (!memrefType)
+    return {};
+
  // Fold if the shape extent along the given index is known.
-  if (auto shapedTy = argTy.dyn_cast<ShapedType>()) {
-    // Folding for unranked types (UnrankedMemRefType) is not supported.
-    if (!shapedTy.hasRank())
-      return {};
-    if (!shapedTy.isDynamicDim(index.getInt())) {
-      Builder builder(getContext());
-      return builder.getIndexAttr(shapedTy.getShape()[index.getInt()]);
-    }
-  }
-
-  Operation *definingOp = memrefOrTensor().getDefiningOp();
-
-  // dim(memref.tensor_load(memref)) -> dim(memref)
-  if (auto tensorLoadOp = dyn_cast_or_null<TensorLoadOp>(definingOp)) {
-    setOperand(0, tensorLoadOp.memref());
-    return getResult();
-  }
-
-  // Fold dim to the operand of tensor.generate.
-  if (auto fromElements = dyn_cast_or_null<tensor::GenerateOp>(definingOp)) {
-    auto resultType =
-        fromElements.getResult().getType().cast<RankedTensorType>();
-    // The case where the type encodes the size of the dimension is handled
-    // above.
-    assert(resultType.getShape()[index.getInt()] ==
-           RankedTensorType::kDynamicSize);
-
-    // Find the operand of the fromElements that corresponds to this index.
-    auto dynExtents = fromElements.dynamicExtents().begin();
-    for (auto dim : resultType.getShape().take_front(index.getInt()))
-      if (dim == RankedTensorType::kDynamicSize)
-        dynExtents++;
-
-    return Value{*dynExtents};
+  if (!memrefType.isDynamicDim(index.getInt())) {
+    Builder builder(getContext());
+    return builder.getIndexAttr(memrefType.getShape()[index.getInt()]);
  }

  // The size at the given index is now known to be a dynamic size.
  unsigned unsignedIndex = index.getValue().getZExtValue();

-  if (auto sliceOp = dyn_cast_or_null<tensor::ExtractSliceOp>(definingOp)) {
-    assert(sliceOp.isDynamicSize(unsignedIndex) &&
-           "Expected dynamic slice size");
-    return sliceOp.getDynamicSize(unsignedIndex);
-  }
-
  // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`.
-  auto memrefType = argTy.dyn_cast<MemRefType>();
-  if (!memrefType)
-    return {};
+  Operation *definingOp = source().getDefiningOp();

  if (auto alloc = dyn_cast_or_null<AllocOp>(definingOp))
    return *(alloc.getDynamicSizes().begin() +
@@ -736,7 +697,7 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {

  LogicalResult matchAndRewrite(DimOp dim,
                                PatternRewriter &rewriter) const override {
-    auto reshape = dim.memrefOrTensor().getDefiningOp<ReshapeOp>();
+    auto reshape = dim.source().getDefiningOp<ReshapeOp>();

    if (!reshape)
      return failure();
@@ -753,18 +714,17 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
  }
 };

-/// Fold dim of a dim of a cast into the dim of the source of the tensor cast.
-template <typename CastOpTy>
+/// Fold dim of a cast into the dim of the source of the memref cast.
 struct DimOfCastOp : public OpRewritePattern<DimOp> {
  using OpRewritePattern<DimOp>::OpRewritePattern;

  LogicalResult matchAndRewrite(DimOp dimOp,
                                PatternRewriter &rewriter) const override {
-    auto castOp = dimOp.memrefOrTensor().getDefiningOp<CastOpTy>();
+    auto castOp = dimOp.source().getDefiningOp<BufferCastOp>();
    if (!castOp)
      return failure();
    Value newSource = castOp.getOperand();
-    rewriter.replaceOpWithNewOp<DimOp>(dimOp, newSource, dimOp.index());
+    rewriter.replaceOpWithNewOp<tensor::DimOp>(dimOp, newSource, dimOp.index());
    return success();
  }
 };
@@ -772,8 +732,7 @@ struct DimOfCastOp : public OpRewritePattern<DimOp> {

 void DimOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                        MLIRContext *context) {
-  results.add<DimOfMemRefReshape, DimOfCastOp<BufferCastOp>,
-              DimOfCastOp<tensor::CastOp>>(context);
+  results.add<DimOfMemRefReshape, DimOfCastOp>(context);
 }

 // ---------------------------------------------------------------------------
@@ -1956,6 +1915,28 @@ OpFoldResult TensorLoadOp::fold(ArrayRef<Attribute>) {
  return {};
 }

+namespace {
+struct DimOfTensorLoadFolder : public OpRewritePattern<tensor::DimOp> {
+  using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    auto tensorLoadOp = dimOp.source().getDefiningOp<TensorLoadOp>();
+    if (!tensorLoadOp)
+      return failure();
+
+    rewriter.replaceOpWithNewOp<DimOp>(dimOp, tensorLoadOp.memref(),
+                                       dimOp.index());
+    return success();
+  }
+};
+} // namespace
+
+void TensorLoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                               MLIRContext *context) {
+  results.add<DimOfTensorLoadFolder>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // TransposeOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
@@ -69,12 +69,13 @@ static Value getResultDimFromShapeInterface(OpBuilder &builder, OpResult result,

 namespace {
 /// Fold dim of an operation that implements the InferShapedTypeOpInterface
-struct DimOfShapedTypeOpInterface : public OpRewritePattern<memref::DimOp> {
-  using OpRewritePattern<memref::DimOp>::OpRewritePattern;
+template <typename OpTy>
+struct DimOfShapedTypeOpInterface : public OpRewritePattern<OpTy> {
+  using OpRewritePattern<OpTy>::OpRewritePattern;

-  LogicalResult matchAndRewrite(memref::DimOp dimOp,
+  LogicalResult matchAndRewrite(OpTy dimOp,
                                PatternRewriter &rewriter) const override {
-    OpResult dimValue = dimOp.memrefOrTensor().dyn_cast<OpResult>();
+    OpResult dimValue = dimOp.source().template dyn_cast<OpResult>();
    if (!dimValue)
      return failure();
    auto shapedTypeOp =
@@ -111,7 +112,10 @@ struct ResolveShapedTypeResultDimsPass final

 void memref::populateResolveShapedTypeResultDimsPatterns(
    RewritePatternSet &patterns) {
-  patterns.add<DimOfShapedTypeOpInterface>(patterns.getContext());
+  // TODO: Move tensor::DimOp pattern to the Tensor dialect.
+  patterns.add<DimOfShapedTypeOpInterface<memref::DimOp>,
+               DimOfShapedTypeOpInterface<tensor::DimOp>>(
+      patterns.getContext());
 }

 void ResolveShapedTypeResultDimsPass::runOnOperation() {
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -99,11 +99,11 @@ public:

 /// Sparse conversion rule for dimension accesses.
 class SparseTensorToDimSizeConverter
-    : public OpConversionPattern<memref::DimOp> {
+    : public OpConversionPattern<tensor::DimOp> {
 public:
  using OpConversionPattern::OpConversionPattern;
  LogicalResult
-  matchAndRewrite(memref::DimOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::DimOp op, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const override {
    if (!operands[0].getType().isa<LLVM::LLVMPointerType>())
      return failure();
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -377,7 +377,7 @@ static bool genBuffers(Merger &merger, CodeGen &codegen,
      // Find lower and upper bound in current dimension.
      Value up;
      if (shape[d] == MemRefType::kDynamicSize) {
-        up = rewriter.create<memref::DimOp>(loc, t->get(), d);
+        up = createOrFoldDimOp(rewriter, loc, t->get(), d);
        args.push_back(up);
      } else {
        up = rewriter.create<ConstantIndexOp>(loc, shape[d]);
--- a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
@@ -16,20 +16,21 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/Transforms/DialectConversion.h"

 using namespace mlir;

 namespace {
-class BufferizeDimOp : public OpConversionPattern<memref::DimOp> {
+class BufferizeDimOp : public OpConversionPattern<tensor::DimOp> {
 public:
  using OpConversionPattern::OpConversionPattern;
  LogicalResult
-  matchAndRewrite(memref::DimOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::DimOp op, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const override {
-    memref::DimOp::Adaptor adaptor(operands);
-    rewriter.replaceOpWithNewOp<memref::DimOp>(op, adaptor.memrefOrTensor(),
+    tensor::DimOp::Adaptor adaptor(operands);
+    rewriter.replaceOpWithNewOp<memref::DimOp>(op, adaptor.source(),
                                               adaptor.index());
    return success();
  }
@@ -94,8 +95,6 @@ struct StdBufferizePass : public StdBufferizeBase<StdBufferizePass> {
      return typeConverter.isLegal(op.getType()) ||
             !op.condition().getType().isa<IntegerType>();
    });
-    target.addDynamicallyLegalOp<memref::DimOp>(
-        [&](memref::DimOp op) { return typeConverter.isLegal(op); });
    if (failed(
            applyPartialConversion(getFunction(), target, std::move(patterns))))
      signalPassFailure();
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -19,6 +19,14 @@
 using namespace mlir;
 using namespace mlir::tensor;

+/// Materialize a single constant operation from a given attribute value with
+/// the desired resultant type.
+Operation *TensorDialect::materializeConstant(OpBuilder &builder,
+                                              Attribute value, Type type,
+                                              Location loc) {
+  return builder.create<mlir::ConstantOp>(loc, type, value);
+}
+
 //===----------------------------------------------------------------------===//
 // CastOp
 //===----------------------------------------------------------------------===//
@@ -184,6 +192,123 @@ void CastOp::getCanonicalizationPatterns(RewritePatternSet &results,
  results.add<ChainedTensorCast>(context);
 }

+//===----------------------------------------------------------------------===//
+// DimOp
+//===----------------------------------------------------------------------===//
+
+void DimOp::build(OpBuilder &builder, OperationState &result, Value source,
+                  int64_t index) {
+  auto loc = result.location;
+  Value indexValue = builder.create<ConstantIndexOp>(loc, index);
+  build(builder, result, source, indexValue);
+}
+
+void DimOp::build(OpBuilder &builder, OperationState &result, Value source,
+                  Value index) {
+  auto indexTy = builder.getIndexType();
+  build(builder, result, indexTy, source, index);
+}
+
+Optional<int64_t> DimOp::getConstantIndex() {
+  if (auto constantOp = index().getDefiningOp<ConstantOp>())
+    return constantOp.getValue().cast<IntegerAttr>().getInt();
+  return {};
+}
+
+static LogicalResult verify(DimOp op) {
+  // Assume unknown index to be in range.
+  Optional<int64_t> index = op.getConstantIndex();
+  if (!index.hasValue())
+    return success();
+
+  // Check that constant index is not knowingly out of range.
+  auto type = op.source().getType();
+  if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
+    if (index.getValue() >= tensorType.getRank())
+      return op.emitOpError("index is out of range");
+  } else if (type.isa<UnrankedTensorType>()) {
+    // Assume index to be in range.
+  } else {
+    llvm_unreachable("expected operand with tensor type");
+  }
+  return success();
+}
+
+OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
+  // All forms of folding require a known index.
+  auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
+  if (!index)
+    return {};
+
+  // Folding for unranked types (UnrankedTensorType) is not supported.
+  auto tensorType = source().getType().dyn_cast<RankedTensorType>();
+  if (!tensorType)
+    return {};
+
+  // Fold if the shape extent along the given index is known.
+  if (!tensorType.isDynamicDim(index.getInt())) {
+    Builder builder(getContext());
+    return builder.getIndexAttr(tensorType.getShape()[index.getInt()]);
+  }
+
+  Operation *definingOp = source().getDefiningOp();
+
+  // Fold dim to the operand of tensor.generate.
+  if (auto fromElements = dyn_cast_or_null<tensor::GenerateOp>(definingOp)) {
+    auto resultType =
+        fromElements.getResult().getType().cast<RankedTensorType>();
+    // The case where the type encodes the size of the dimension is handled
+    // above.
+    assert(resultType.getShape()[index.getInt()] ==
+           RankedTensorType::kDynamicSize);
+
+    // Find the operand of the fromElements that corresponds to this index.
+    auto dynExtents = fromElements.dynamicExtents().begin();
+    for (auto dim : resultType.getShape().take_front(index.getInt()))
+      if (dim == RankedTensorType::kDynamicSize)
+        dynExtents++;
+
+    return Value{*dynExtents};
+  }
+
+  // The size at the given index is now known to be a dynamic size.
+  unsigned unsignedIndex = index.getValue().getZExtValue();
+
+  if (auto sliceOp = dyn_cast_or_null<tensor::ExtractSliceOp>(definingOp)) {
+    assert(sliceOp.isDynamicSize(unsignedIndex) &&
+           "Expected dynamic slice size");
+    return sliceOp.getDynamicSize(unsignedIndex);
+  }
+
+  // dim(cast) -> dim
+  if (succeeded(foldTensorCast(*this)))
+    return getResult();
+
+  return {};
+}
+
+namespace {
+/// Fold dim of a cast into the dim of the source of the tensor cast.
+struct DimOfCastOp : public OpRewritePattern<DimOp> {
+  using OpRewritePattern<DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    auto castOp = dimOp.source().getDefiningOp<CastOp>();
+    if (!castOp)
+      return failure();
+    Value newSource = castOp.getOperand();
+    rewriter.replaceOpWithNewOp<DimOp>(dimOp, newSource, dimOp.index());
+    return success();
+  }
+};
+} // end anonymous namespace.
+
+void DimOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                        MLIRContext *context) {
+  results.add<DimOfCastOp>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // ExtractOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
@@ -157,7 +157,8 @@ struct TensorBufferizePass : public TensorBufferizeBase<TensorBufferizePass> {
    target.addIllegalOp<tensor::CastOp, tensor::ExtractOp,
                        tensor::FromElementsOp, tensor::GenerateOp>();
    target.addLegalDialect<memref::MemRefDialect>();
-    target.addLegalDialect<StandardOpsDialect>();
+    target.addDynamicallyLegalDialect<StandardOpsDialect>(
+        [&](Operation *op) { return typeConverter.isLegal(op); });
    target.addLegalDialect<scf::SCFDialect>();

    if (failed(
--- a/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
@@ -13,6 +13,10 @@

 namespace mlir {

+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 namespace scf {
 class SCFDialect;
 } // end namespace scf
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -2300,7 +2300,7 @@ static Value createInBoundsCond(OpBuilder &b,
    Value sum =
        makeComposedAffineApply(b, loc, d0 + vs, xferOp.indices()[indicesIdx]);
    Value cond = createFoldedSLE(
-        b, sum, lb.create<memref::DimOp>(xferOp.source(), indicesIdx));
+        b, sum, createOrFoldDimOp(b, loc, xferOp.source(), indicesIdx));
    if (!cond)
      return;
    // Conjunction over all dims for which we are in-bounds.
@@ -2385,7 +2385,8 @@ static Value createSubViewIntersection(OpBuilder &b,
  auto isaWrite = isa<vector::TransferWriteOp>(xferOp);
  xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) {
    using MapList = ArrayRef<ArrayRef<AffineExpr>>;
-    Value dimMemRef = lb.create<memref::DimOp>(xferOp.source(), indicesIdx);
+    Value dimMemRef =
+        createOrFoldDimOp(b, xferOp.getLoc(), xferOp.source(), indicesIdx);
    Value dimAlloc = lb.create<memref::DimOp>(alloc, resultIdx);
    Value index = xferOp.indices()[indicesIdx];
    AffineExpr i, j, k;
@@ -3923,7 +3924,7 @@ public:
    unsigned vecWidth = vtp.getNumElements();
    unsigned lastIndex = llvm::size(xferOp.indices()) - 1;
    Value off = xferOp.indices()[lastIndex];
-    Value dim = rewriter.create<memref::DimOp>(loc, xferOp.source(), lastIndex);
+    Value dim = createOrFoldDimOp(rewriter, loc, xferOp.source(), lastIndex);
    Value mask = buildVectorComparison(
        rewriter, xferOp, enableIndexOptimizations, vecWidth, dim, &off);

--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -31,7 +31,7 @@ func @binary_ops_on_size(%lhs : !shape.size, %rhs : !shape.size) {
 // CHECK-SAME: (%[[SHAPE:.*]]: tensor<?xindex>) -> index
 func @rank(%shape : tensor<?xindex>) -> index {
  // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RESULT:.*]] = memref.dim %[[SHAPE]], %[[C0]]
+  // CHECK: %[[RESULT:.*]] = tensor.dim %[[SHAPE]], %[[C0]]
  // CHECK: return %[[RESULT]] : index
  %rank = shape.rank %shape : tensor<?xindex> -> index
  return %rank : index
@@ -60,12 +60,12 @@ func @rank(%shape : !shape.shape) {

 // -----

-// Express `get_extent` as `memref.dim` when it relies directly on the outcome of a
+// Express `get_extent` as `tensor.dim` when it relies directly on the outcome of a
 // `shape_of` operation.
 // CHECK-LABEL: @get_extent_shape_of
 // CHECK-SAME:  (%[[ARG:.*]]: tensor<2x3xf32>, %[[IDX:.*]]: index) -> index
 func @get_extent_shape_of(%arg : tensor<2x3xf32>, %idx : index) -> index {
-  // CHECK: %[[RESULT:.*]] = memref.dim %[[ARG]], %[[IDX]] : tensor<2x3xf32>
+  // CHECK: %[[RESULT:.*]] = tensor.dim %[[ARG]], %[[IDX]] : tensor<2x3xf32>
  // CHECK: return %[[RESULT]] : index
  %shape = shape.shape_of %arg : tensor<2x3xf32> -> tensor<?xindex>
  %result = shape.get_extent %shape, %idx : tensor<?xindex>, index -> index
@@ -178,7 +178,7 @@ func @shape_reduce(%shape : tensor<?xindex>) -> index {
 // CHECK-NEXT: %[[INIT:.*]] = constant 1 : index
 // CHECK-NEXT: %[[C0:.*]] = constant 0 : index
 // CHECK-NEXT: %[[C1:.*]] = constant 1 : index
-// CHECK-NEXT: %[[RANK:.*]] = memref.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
+// CHECK-NEXT: %[[RANK:.*]] = tensor.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
 // CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index)
 // CHECK-NEXT:   %[[EXTENT:.*]] = tensor.extract %[[SHAPE]][%[[I]]]
 // CHECK-NEXT:   %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index
@@ -206,7 +206,7 @@ func @shape_of_unranked(%arg : tensor<*xf32>) {
  // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32>
  // CHECK: %[[SHAPE:.*]] = tensor.generate %[[RANK]] {
  // CHECK: ^bb0(%[[I:.*]]: index):
-  // CHECK:   %[[EXTENT:.*]] = memref.dim %[[ARG]], %[[I]] : tensor<*xf32>
+  // CHECK:   %[[EXTENT:.*]] = tensor.dim %[[ARG]], %[[I]] : tensor<*xf32>
  // CHECK:   yield %[[EXTENT]] : index
  // CHECK: } : tensor<?xindex>
  %shape = shape.shape_of %arg : tensor<*xf32> -> tensor<?xindex>
@@ -258,7 +258,7 @@ func @shape_of_dyn(%arg : tensor<1x5x?xf32>) {
  // CHECK-DAG: %[[C1:.*]] = constant 1 : index
  // CHECK-DAG: %[[C5:.*]] = constant 5 : index
  // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-  // CHECK-DAG: %[[DYN_DIM:.*]] = memref.dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32>
+  // CHECK-DAG: %[[DYN_DIM:.*]] = tensor.dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32>
  // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor.from_elements %[[C1]], %[[C5]], %[[DYN_DIM]] : tensor<3xindex>
  %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor<?xindex>
  return
@@ -270,8 +270,8 @@ func @shape_of_dyn(%arg : tensor<1x5x?xf32>) {
 // CHECK-SAME:   (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>) -> i1
 func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>) -> i1 {
  // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RANK_A:.*]] = memref.dim %[[A]], %[[C0]] : tensor<?xindex>
-  // CHECK: %[[RANK_B:.*]] = memref.dim %[[B]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_A:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_B:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?xindex>
  // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_B]]
  // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
  // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -299,8 +299,8 @@ func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>) -> i1 {
 // CHECK-SAME:   (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>, %[[C:.*]]: tensor<?xindex>) -> i1
 func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>, %c : tensor<?xindex>) -> i1 {
  // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RANK_A:.*]] = memref.dim %[[A]], %[[C0]] : tensor<?xindex>
-  // CHECK: %[[RANK_B:.*]] = memref.dim %[[B]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_A:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_B:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?xindex>
  // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_B]]
  // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
  // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -317,7 +317,7 @@ func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>, %c : tensor<?xindex>)
  // CHECK:   %[[SHAPE_EQ_INNER:.*]] = constant false
  // CHECK:   scf.yield %[[SHAPE_EQ_INNER]] : i1
  // CHECK: }
-  // CHECK: %[[RANK_C:.*]] = memref.dim %[[C]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_C:.*]] = tensor.dim %[[C]], %[[C0]] : tensor<?xindex>
  // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_C]]
  // CHECK: %[[SHAPE_EQ2:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
  // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -362,9 +362,9 @@ func @try_is_broadcastable (%a : tensor<2xindex>, %b : tensor<3xindex>, %c : ten
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>)
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -452,9 +452,9 @@ func @broadcast(%a : tensor<2xindex>, %b : tensor<3xindex>, %c : tensor<2xindex>
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>)
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -544,9 +544,9 @@ func @broadcast_3_shapes_different_extents(%a : tensor<2xindex>,
 // CHECK-SAME:          %[[ARG1:.*]]: tensor<3xindex>,
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>) {
 // CHECK:           %[[C0:.*]] = constant 0 : index
-// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -611,7 +611,7 @@ func @broadcast_to_known_rank(%a : tensor<1xindex>, %b : tensor<3xindex>)
 // CHECK-SAME: %[[SHAPE:.*]]: tensor<?xindex>, %[[INDEX:.*]]: index
 func @split_at(%shape: tensor<?xindex>, %index: index) -> (tensor<?xindex>, tensor<?xindex>) {
  // CHECK-NEXT: %[[C0:.*]] = constant 0 : index
-  // CHECK-NEXT: %[[RANK:.*]] = memref.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
+  // CHECK-NEXT: %[[RANK:.*]] = tensor.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
  // CHECK-NEXT: %[[POSINDEX:.*]] = addi %[[INDEX]], %[[RANK]] : index
  // CHECK-NEXT: %[[ISNEG:.*]] = cmpi slt, %[[INDEX]], %[[C0]] : index
  // CHECK-NEXT: %[[SELECT:.*]] = select %[[ISNEG]], %[[POSINDEX]], %[[INDEX]] : index
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -670,18 +670,18 @@ func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
  // CHECK: [[STRIDE:%.+]]   = constant 1
  // CHECK: [[OFFSET:%.+]] = constant 0 : index
  // CHECK: [[IDX0:%.+]] = constant 0 : index
-  // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[IDX0]]
+  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[IDX0]]
  // CHECK: [[IDX1:%.+]] = constant 1 : index
-  // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]
-  // CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg1, [[AXIS]]
+  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[IDX1]]
+  // CHECK: [[ARG1_AXIS:%.+]] = tensor.dim %arg1, [[AXIS]]
  // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM0]], [[ARG1_AXIS]]
  // CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]
  // CHECK: [[CST:%.+]] = constant 0.0
  // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]])
-  // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]]
+  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[AXIS]]
  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]
-  // CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]]
+  // CHECK: [[ARG1_DIM0:%.+]] = tensor.dim %arg1, [[AXIS]]
  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)

@@ -689,18 +689,18 @@ func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
  // CHECK: [[STRIDE:%.+]]   = constant 1
  // CHECK: [[OFFSET:%.+]] = constant 0 : index
  // CHECK: [[IDX0:%.+]] = constant 0 : index
-  // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[IDX0]]
+  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[IDX0]]
  // CHECK: [[IDX1:%.+]] = constant 1 : index
-  // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[IDX1]]
-  // CHECK: [[ARG1_AXIS:%.+]] = memref.dim %arg0, [[AXIS]]
+  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[IDX1]]
+  // CHECK: [[ARG1_AXIS:%.+]] = tensor.dim %arg0, [[AXIS]]
  // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM1]], [[ARG1_AXIS]]
  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]
  // CHECK: [[CST:%.+]] = constant 0.0
  // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]])
-  // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
+  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[AXIS]]
  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]
-  // CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
+  // CHECK: [[ARG1_DIM1:%.+]] = tensor.dim %arg0, [[AXIS]]
  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
  return
@@ -878,20 +878,13 @@ func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: ten

 func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
  %0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
-  // CHECK: [[INDEX0:%.+]] = constant 0 : index
+  // TODO: Output contains multiple "constant 1 : index".
  // CHECK: [[INDEX1:%.+]] = constant 1 : index
-  // CHECK: [[ROW0:%.+]] = constant 0 : index
-  // CHECK: [[LOW0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX0]]]
-  // CHECK: [[HIGH0:%.+]] = tensor.extract %cst{{\[}}[[ROW0]], [[INDEX1]]]
-  // CHECK: [[LOW0_IDX:%.+]] = index_cast %0
-  // CHECK: [[HIGH0_IDX:%.+]] = index_cast %1
-  // CHECK: [[ROW1:%.+]] = constant 1 : index
-  // CHECK: [[LOW1:%.+]] = tensor.extract %cst{{\[}}%c1_1, %c0]
-  // CHECK: [[HIGH1:%.+]] = tensor.extract %cst{{\[}}%c1_1, %c1]
-  // CHECK: [[LOW1_IDX:%.+]] = index_cast [[LOW1]]
-  // CHECK: [[HIGH1_IDX:%.+]] = index_cast [[HIGH1]]
+  // CHECK: [[INDEX2:%.+]] = constant 2 : index
+  // CHECK: [[INDEX3:%.+]] = constant 3 : index
+  // CHECK: [[INDEX4:%.+]] = constant 4 : index
  // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32
-  // CHECK: %8 = linalg.pad_tensor %arg0 low{{\[}}[[LOW0_IDX]], [[LOW1_IDX]]] high{{\[}}[[HIGH0_IDX]], [[HIGH1_IDX]]]  {
+  // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
  // CHECK: ^bb0(%arg1: index, %arg2: index):  // no predecessors
  // CHECK:   linalg.yield [[CST]]
  // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -101,8 +101,8 @@ func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
 // CHECK-DAG:       %[[C0:.*]] = constant 0 : index
 // CHECK-DAG:       %[[C1:.*]] = constant 1 : index
 // CHECK:           %[[MEMREF_ARG:.*]] = memref.buffer_cast %[[ARG]] : memref<?x?xf32>
-// CHECK:           %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
-// CHECK:           %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
+// CHECK:           %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
+// CHECK:           %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
 // CHECK:           %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
 // CHECK:           %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
 // CHECK:           linalg.generic
@@ -214,8 +214,8 @@ func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1

  // CHECK-DAG: %[[M:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
  // CHECK-DAG: %[[SM0:.*]] = memref.buffer_cast %[[ST0]] : memref<2x3xf32>
-  // CHECK-NEXT: %[[DIM0:.*]] = memref.dim %[[T]], %[[C0]] : tensor<?x?xf32>
-  // CHECK-NEXT: %[[DIM1:.*]] = memref.dim %[[T]], %[[C1]] : tensor<?x?xf32>
+  // CHECK-NEXT: %[[DIM0:.*]] = tensor.dim %[[T]], %[[C0]] : tensor<?x?xf32>
+  // CHECK-NEXT: %[[DIM1:.*]] = tensor.dim %[[T]], %[[C1]] : tensor<?x?xf32>
  // CHECK-NEXT: %[[M_COPY0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
  // CHECK-NEXT: linalg.copy(%[[M]], %[[M_COPY0]]) : memref<?x?xf32>, memref<?x?xf32>
  // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[M_COPY0]][0, 0] [2, 3] [1, 1]
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -543,7 +543,7 @@ func @init_tensor_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
 // CHECK-SAME:   %[[ARG0:.+]]: index
 //      CHECK:   %[[C2:.+]] = constant 2
 //      CHECK:   %[[INIT1:.+]] = linalg.init_tensor [6, 5, %[[ARG0]]]
-//      CHECK:   %[[D0:.+]] = memref.dim %[[INIT1]], %[[C2]]
+//      CHECK:   %[[D0:.+]] = tensor.dim %[[INIT1]], %[[C2]]
 //      CHECK:   %[[T0:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   %[[INIT2:.+]] = linalg.init_tensor [2, 3, 5, 4, %[[T0]], 7]
 //      CHECK:   return %[[INIT2]]
@@ -561,7 +561,7 @@ func @init_tensor_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
 // CHECK-SAME:   %[[ARG0:.+]]: index
 //      CHECK:   %[[C4:.+]] = constant 4
 //      CHECK:   %[[INIT1:.+]] = linalg.init_tensor [2, 3, 5, 4, %[[ARG0]], 7]
-//      CHECK:   %[[D0:.+]] = memref.dim %[[INIT1]], %[[C4]]
+//      CHECK:   %[[D0:.+]] = tensor.dim %[[INIT1]], %[[C4]]
 //      CHECK:   %[[T0:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   %[[INIT2:.+]] = linalg.init_tensor [6, 5, %[[T0]]]
 //      CHECK:   return %[[INIT2]]
@@ -574,9 +574,9 @@ func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %c2 = constant 2 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?x?xf32>
-  %2 = memref.dim %arg0, %c2 : tensor<?x?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
  %4, %5 = linalg.generic {
    indexing_maps = [#map, #map, #map, #map],
@@ -600,8 +600,8 @@ func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %cst = constant 1.000000e+00 : f32
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  br ^bb1(%cst : f32)

@@ -626,8 +626,8 @@ func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %cst = constant 1.000000e+00 : f32
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  br ^bb1(%cst : f32)

@@ -721,8 +721,8 @@ func @propogate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
  %c42 = constant 42 : index
  %0 = linalg.init_tensor [%c21, %c42] : tensor<?x?xf32>
  %1 = linalg.fill(%arg1, %0) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
-  %2 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %3 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
  return %4 : tensor<?x?xf32>
 }
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -93,11 +93,11 @@ func @cmpf(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
 //  CHECK-SAME:   %[[ARG1:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32>
 func @cmpf(%arg0: tensor<4x?x?x8x2x?xf32>, %arg1: tensor<4x?x?x8x2x?xf32>) -> tensor<4x?x?x8x2x?xi1> {
  // CHECK: %[[C1:.*]] = constant 1 : index
-  // CHECK: %[[D1:.*]] = memref.dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32>
  // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %[[D2:.*]] = memref.dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
  // CHECK: %[[C5:.*]] = constant 5 : index
-  // CHECK: %[[D5:.*]] = memref.dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D5:.*]] = tensor.dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
  // CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1>
  // CHECK: linalg.generic
  // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -329,7 +329,7 @@ func @fold_slice(
 func @unit_dim_for_reduction(%arg0: tensor<1x?x1x?xf32>) -> tensor<1x?xf32> {
  %cst = constant 1.000000e+00 : f32
  %c3 = constant 3 : index
-  %0 = memref.dim %arg0, %c3 : tensor<1x?x1x?xf32>
+  %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
  %1 = linalg.init_tensor [1, %0] : tensor<1x?xf32>
  %2 = linalg.fill(%cst, %1) : f32, tensor<1x?xf32> -> tensor<1x?xf32>
  %3 = linalg.generic {
@@ -398,7 +398,7 @@ func @unit_dim_for_reduction_keep_one(%arg0: tensor<1x?x1x1xf32>) -> tensor<1x1x
 func @unit_dim_for_reduction_inner(%arg0: tensor<?x1x?x1xf32>) -> tensor<?x1xf32> {
  %cst = constant 1.000000e+00 : f32
  %c2 = constant 2 : index
-  %0 = memref.dim %arg0, %c2 : tensor<?x1x?x1xf32>
+  %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
  %1 = linalg.init_tensor [%0, 1] : tensor<?x1xf32>
  %2 = linalg.fill(%cst, %1) : f32, tensor<?x1xf32> -> tensor<?x1xf32>
  %3 = linalg.generic {
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
@@ -15,8 +15,8 @@ func @test_fusion_limit(
    -> tensor<?x?xf32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %d0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %d1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
  %0 = linalg.generic #binary2Dpointwise
      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -150,8 +150,8 @@ module {
    %c1 = constant 1 : index
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-    %1 = memref.dim %0, %c0 : tensor<?x?xf32>
-    %2 = memref.dim %0, %c1 : tensor<?x?xf32>
+    %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
+    %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
    %3 = linalg.init_tensor [%1, %2] : tensor<?x?xf32>
    %4 = linalg.generic
      {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -223,26 +223,26 @@ module {
 //  CHECK-SAME:   %[[ARG6:[a-zA-Z0-9_]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//       CHECK:   %[[M:.+]] = memref.dim %[[ARG0]], %c0 : tensor<?x?xf32>
+//       CHECK:   %[[M:.+]] = tensor.dim %[[ARG0]], %c0 : tensor<?x?xf32>
 //       CHECK:   %[[R0:.+]] = scf.for %[[IV0:[a-zA-Z0-9_]+]] =
 //  CHECK-SAME:     iter_args(%[[ARG8:.+]] = %[[ARG6]]) -> (tensor<?x?xf32>) {
-//       CHECK:     %[[M_1:.+]] = memref.dim %[[ARG8]], %[[C0]]
+//       CHECK:     %[[M_1:.+]] = tensor.dim %[[ARG8]], %[[C0]]
 //       CHECK:     %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[M_1]], %[[IV0]])
-//       CHECK:     %[[N3:.+]] = memref.dim %[[ARG8]], %[[C1]]
+//       CHECK:     %[[N3:.+]] = tensor.dim %[[ARG8]], %[[C1]]
 //       CHECK:     %[[STARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_1]], %[[N3]]]
-//       CHECK:     %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
+//       CHECK:     %[[M_2:.+]] = tensor.dim %[[ARG4]], %[[C0]]
 //       CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_2]], %[[M]]]
-//       CHECK:     %[[N2:.+]] = memref.dim %[[ARG4]], %[[C1]]
+//       CHECK:     %[[N2:.+]] = tensor.dim %[[ARG4]], %[[C1]]
 //       CHECK:     %[[STARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_2]], %[[N2]]]
 //       CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]]
-//       CHECK:     %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//       CHECK:     %[[N0:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //       CHECK:     %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_3]], %[[N0]]]
-//       CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
+//       CHECK:     %[[M_3:.+]] = tensor.dim %[[ARG2]], %[[C0]]
 //       CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_3]], %[[M]]]
-//       CHECK:     %[[N1:.+]] = memref.dim %[[ARG2]], %[[C1]]
+//       CHECK:     %[[N1:.+]] = tensor.dim %[[ARG2]], %[[C1]]
 //       CHECK:     %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_4]], %[[N1]]]
 //       CHECK:     %[[T0:.+]] = linalg.matmul
--- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
@@ -31,30 +31,30 @@ module {
 //  CHECK-DAG:   %[[C32:.+]] = constant 32 : index
 //  CHECK-DAG:   %[[C64:.+]] = constant 64 : index
 //  CHECK-DAG:   %[[C16:.+]] = constant 16 : index
-//  CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//  CHECK-DAG:   %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //      CHECK:   %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:     %[[C0]] to %[[M]] step %[[C32]]
 // CHECK-SAME:     iter_args(%[[ARG6:.+]] = %[[ARG4]]) -> (tensor<?x?xf32>) {
-//      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG6]], %[[C0]]
+//      CHECK:     %[[M_2:.+]] = tensor.dim %[[ARG6]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]])
-//      CHECK:     %[[N3:.+]] = memref.dim %[[ARG6]], %[[C1]]
+//      CHECK:     %[[N3:.+]] = tensor.dim %[[ARG6]], %[[C1]]
 //      CHECK:     %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N3]]]
 //      CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M]], %[[M]]]
-//      CHECK:     %[[N1:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//      CHECK:     %[[N1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //      CHECK:     %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_3]], %[[N1]]]
-//      CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
+//      CHECK:     %[[M_3:.+]] = tensor.dim %[[ARG2]], %[[C0]]
 //      CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M_3]], %[[M]]]
-//      CHECK:     %[[N2_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
+//      CHECK:     %[[N2_2:.+]] = tensor.dim %[[ARG2]], %[[C1]]
 //      CHECK:     %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_4]], %[[N2_2]]]
 //      CHECK:     %[[LHS:.+]] = linalg.matmul
 // CHECK-SAME:       __internal_linalg_transform__ = "after_lhs_fusion_producer"
 // CHECK-SAME:       ins(%[[ST_ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:       outs(%[[ST_ARG2]] : tensor<?x?xf32>)
-//      CHECK:     %[[N2:.+]] = memref.dim %[[ARG1]], %[[C1]]
-//      CHECK:     %[[N3_2:.+]] = memref.dim %[[ARG3]], %[[C1]]
+//      CHECK:     %[[N2:.+]] = tensor.dim %[[ARG1]], %[[C1]]
+//      CHECK:     %[[N3_2:.+]] = tensor.dim %[[ARG3]], %[[C1]]
 //      CHECK:     %[[YIELD0:.+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       %[[C0]] to %[[N3_2]] step %[[C64]]
 // CHECK-SAME:       iter_args(%[[ARG8:.+]] = %[[ST_ARG6]]) -> (tensor<?x?xf32>) {
@@ -64,13 +64,13 @@ module {
 //      CHECK:         %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]]
 //      CHECK:         %[[ST_LHS:.+]] = tensor.extract_slice %[[LHS]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M_3]], %[[TILE_N2]]]
-//      CHECK:         %[[N2_3:.+]] = memref.dim %[[ARG3]], %[[C0]]
+//      CHECK:         %[[N2_3:.+]] = tensor.dim %[[ARG3]], %[[C0]]
 //      CHECK:         %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]]
 //      CHECK:         %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]]
 //      CHECK:         %[[ST_ARG3:.+]] = tensor.extract_slice %[[ARG3]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_N2_2]], %[[TILE_N3]]]
-//      CHECK:         %[[M_4:.+]] = memref.dim %[[ARG10]], %[[C0]]
-//      CHECK:         %[[N3_3:.+]] = memref.dim %[[ARG10]], %[[C1]]
+//      CHECK:         %[[M_4:.+]] = tensor.dim %[[ARG10]], %[[C0]]
+//      CHECK:         %[[N3_3:.+]] = tensor.dim %[[ARG10]], %[[C1]]
 //      CHECK:         %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]])
 //      CHECK:         %[[ST_ARG4:.+]] = tensor.extract_slice %[[ARG10]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[M_4]], %[[TILE_N3_2]]]
@@ -104,7 +104,7 @@ module {
 // TLOOP-DAG:  %[[C0:.*]] = constant 0 : index
 // TLOOP-DAG:  %[[C1:.*]] = constant 1 : index

-// TLOOP:  %[[DIM_A0:.*]] = memref.dim %[[A]], %[[C0]] : [[TY:.*]]
+// TLOOP:  %[[DIM_A0:.*]] = tensor.dim %[[A]], %[[C0]] : [[TY:.*]]

 // TLOOP:  %[[ABC:.*]] = linalg.tiled_loop (%[[IV0:.*]]) = (%[[C0]]) 
 // TLOOP-SAME: to (%[[DIM_A0]]) step (%[[C32]]) 
@@ -121,8 +121,8 @@ module {
 // TLOOP:    %[[AB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[A_SUB]], %[[B_]] : {{.*}}) outs(%[[AB_INIT_SUB]]

-// TLOOP:    %[[DIM_B_1:.*]] = memref.dim %[[B_]], %[[C1]] : [[TY]]
-// TLOOP:    %[[DIM_C_1:.*]] = memref.dim %[[C_]], %[[C1]] : [[TY]]
+// TLOOP:    %[[DIM_B_1:.*]] = tensor.dim %[[B_]], %[[C1]] : [[TY]]
+// TLOOP:    %[[DIM_C_1:.*]] = tensor.dim %[[C_]], %[[C1]] : [[TY]]

 // TLOOP:    %[[ABC_SUB_:.*]] = linalg.tiled_loop (%[[IV1:.*]], %[[IV2:.*]]) = 
 // TLOOP-SAME: (%[[C0]], %[[C0]]) to (%[[DIM_C_1]], %[[DIM_B_1]])
@@ -156,12 +156,12 @@ module {
                           %arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{
    %c0 = constant 0 : index
    %c1 = constant 1 : index
-    %0 = memref.dim %arg2, %c0 : tensor<?x?xf32>
-    %1 = memref.dim %arg2, %c1 : tensor<?x?xf32>
+    %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
+    %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
    %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-    %3 = memref.dim %2, %c0 : tensor<?x?xf32>
-    %4 = memref.dim %2, %c1 : tensor<?x?xf32>
+    %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
+    %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
    %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
    %6 = linalg.generic
      {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -213,8 +213,8 @@ module {
 // TLOOP-DAG:  %[[C0:.*]] = constant 0 : index
 // TLOOP-DAG:  %[[C1:.*]] = constant 1 : index

-// TLOOP:  %[[DIM_A_0:.*]] = memref.dim %[[A]], %[[C0]] : [[TY:.*]]
-// TLOOP:  %[[DIM_B_1:.*]] = memref.dim %[[B]], %[[C1]] : [[TY]]
+// TLOOP:  %[[DIM_A_0:.*]] = tensor.dim %[[A]], %[[C0]] : [[TY:.*]]
+// TLOOP:  %[[DIM_B_1:.*]] = tensor.dim %[[B]], %[[C1]] : [[TY]]

 // TLOOP:  %[[INIT:.*]] = linalg.init_tensor [%[[DIM_A_0]], %[[DIM_B_1]]]

@@ -289,8 +289,8 @@ module {
 // TLOOP-DAG:  %[[C0:.*]] = constant 0 : index
 // TLOOP-DAG:  %[[C1:.*]] = constant 1 : index

-// TLOOP:  %[[DIM_A_0:.*]] = memref.dim %[[A]], %[[C0]] : [[TY:.*]]
-// TLOOP:  %[[DIM_B_1:.*]] = memref.dim %[[B]], %[[C1]] : [[TY]]
+// TLOOP:  %[[DIM_A_0:.*]] = tensor.dim %[[A]], %[[C0]] : [[TY:.*]]
+// TLOOP:  %[[DIM_B_1:.*]] = tensor.dim %[[B]], %[[C1]] : [[TY]]

 // TLOOP:  %[[AB:.*]] = linalg.tiled_loop (%[[I:.*]], %[[J:.*]]) =
 // TLOOP-SAME: (%[[C0]], %[[C0]]) to (%[[DIM_A_0]], %[[DIM_B_1]])
@@ -300,7 +300,7 @@ module {
 // TLOOP-SAME:      %[[C0_F32_:.*]] = %[[C0_F32]]
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {

-// TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
+// TLOOP:    %[[DIM_A__1:.*]] = tensor.dim %[[A_]], %[[C1]] : [[TY]]
 // TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
 // TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
 // TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
@@ -360,8 +360,8 @@ module {
 // TLOOP-DAG:  %[[C0:.*]] = constant 0 : index
 // TLOOP-DAG:  %[[C1:.*]] = constant 1 : index

-// TLOOP:  %[[DIM_A_0:.*]] = memref.dim %[[A]], %[[C0]] : [[TY:.*]]
-// TLOOP:  %[[DIM_B_1:.*]] = memref.dim %[[B]], %[[C1]] : [[TY]]
+// TLOOP:  %[[DIM_A_0:.*]] = tensor.dim %[[A]], %[[C0]] : [[TY:.*]]
+// TLOOP:  %[[DIM_B_1:.*]] = tensor.dim %[[B]], %[[C1]] : [[TY]]

 // TLOOP:  %[[AB:.*]] = linalg.tiled_loop (%[[I:.*]], %[[J:.*]]) =
 // TLOOP-SAME: (%[[C0]], %[[C0]]) to (%[[DIM_A_0]], %[[DIM_B_1]])
@@ -371,7 +371,7 @@ module {
 // TLOOP-SAME:      %[[C0_F32_:.*]] = %[[C0_F32]]
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {

-// TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
+// TLOOP:    %[[DIM_A__1:.*]] = tensor.dim %[[A_]], %[[C1]] : [[TY]]
 // TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
 // TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
 // TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
--- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir
@@ -8,8 +8,8 @@ func @add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : te
 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -50,8 +50,8 @@ func @scalar_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : f32, %arg2 : f32) ->
 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
@@ -92,8 +92,8 @@ func @transpose_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -126,8 +126,8 @@ func @add_transpose_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -161,7 +161,7 @@ func @add_broadcast_mul_fusion(%arg0: tensor<?xf32>, %arg1 : tensor<?xf32>, %arg
 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
  %1 = linalg.init_tensor [%0] : tensor<?xf32>
  %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
      ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
@@ -172,7 +172,7 @@ func @add_broadcast_mul_fusion(%arg0: tensor<?xf32>, %arg1 : tensor<?xf32>, %arg
  } -> tensor<?xf32>
  // CHECK: linalg.generic {
  // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP1]], [[$MAP0]], [[$MAP0]]
-  %3 = memref.dim %arg2, %c1 : tensor<?x?xf32>
+  %3 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
  %4 = linalg.init_tensor [%0, %3] : tensor<?x?xf32>
  %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
@@ -224,8 +224,8 @@ func @generic_op_constant_fusion(%arg0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
  %c1 = constant 1 : index
  %c2 = constant 2 : index
  %cst = constant dense<42.0> : tensor<5xf32>
-  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
  %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
  %3 = linalg.generic {
    indexing_maps = [#map0, #map1, #map1],
@@ -256,8 +256,8 @@ func @generic_op_zero_dim_constant_fusion(%arg0 : tensor<5x?x?xf32>)
  %c1 = constant 1 : index
  %c2 = constant 2 : index
  %cst = constant dense<42.0> : tensor<f32>
-  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = tensor.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = tensor.dim %arg0, %c2 : tensor<5x?x?xf32>
  %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
  %3 = linalg.generic {
    indexing_maps = [#map0, #map1, #map1],
@@ -284,8 +284,8 @@ func @producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>,
                                       %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
  %3 = linalg.generic {
    indexing_maps = [#map0, #map0, #map0],
@@ -335,8 +335,8 @@ func @producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>,
 func @indexed_producer_consumer_fusion(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
  %3 = linalg.generic {
    indexing_maps = [#map0, #map0],
@@ -389,8 +389,8 @@ func @indexed_producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>)
                                               -> tensor<?x?xi32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xi32>
  %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
  %3 = linalg.generic {
    indexing_maps = [#map0, #map0],
@@ -452,7 +452,7 @@ func @one_dim_indexed_producer_consumer_fusion(%arg0 : tensor<?xi32>,
                                               %arg1 : tensor<?x?xi32>) -> tensor<?x?xi32> {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %d0 = memref.dim %arg0, %c0 : tensor<?xi32>
+  %d0 = tensor.dim %arg0, %c0 : tensor<?xi32>
  %0 = linalg.init_tensor [%d0] : tensor<?xi32>
  %1 = linalg.generic
      {indexing_maps = [#map1, #map1],
@@ -464,8 +464,8 @@ func @one_dim_indexed_producer_consumer_fusion(%arg0 : tensor<?xi32>,
        %4 = addi %arg2, %3 : i32
        linalg.yield %4 : i32
      } -> tensor<?xi32>
-  %2 = memref.dim %arg1, %c0 : tensor<?x?xi32>
-  %3 = memref.dim %arg1, %c1 : tensor<?x?xi32>
+  %2 = tensor.dim %arg1, %c0 : tensor<?x?xi32>
+  %3 = tensor.dim %arg1, %c1 : tensor<?x?xi32>
  %4 = linalg.init_tensor [%2, %3] : tensor<?x?xi32>
  %5 = linalg.generic
      {indexing_maps = [#map2, #map3, #map2],
@@ -629,7 +629,7 @@ func @sigmoid_dynamic_dim(%0: tensor<?x1xf32>) -> tensor<?x1xf32> {
    ^bb0(%a: f32):  // no predecessors
      linalg.yield %cp5 : f32
  } -> tensor<?x1xf32>
-  %d0 = memref.dim %0, %c0 : tensor<?x1xf32>
+  %d0 = tensor.dim %0, %c0 : tensor<?x1xf32>
  %init1 = linalg.init_tensor [%d0, 1] : tensor<?x1xf32>
  %2 = linalg.generic {indexing_maps = [
    affine_map<(d0, d1) -> (d0, d1)>,
@@ -730,13 +730,13 @@ func @break_outs_dependency(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?xf32>)
 //  CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //  CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//  CHECK-DAG:   %[[D0:.+]] = memref.dim %[[ARG0]], %[[C0]]
-//  CHECK-DAG:   %[[D1:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+//  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
 //      CHECK:   %[[GENERIC1:.+]] = linalg.generic
 // CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
-//  CHECK-DAG:   %[[D0:.+]] = memref.dim %[[GENERIC1]], %[[C0]]
-//  CHECK-DAG:   %[[D1:.+]] = memref.dim %[[GENERIC1]], %[[C1]]
+//  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]]
+//  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = linalg.init_tensor [%[[D0]], %[[D1]]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -38,12 +38,12 @@ func @matmul_tensors(
  %c0 = constant 0 : index
  %c1 = constant 1 : index

-  //  CHECK-DAG: %[[dM:.*]] = memref.dim %[[TA]], %[[C0]] : tensor<?x?xf32>
-  //  CHECK-DAG: %[[dK:.*]] = memref.dim %[[TA]], %[[C1]] : tensor<?x?xf32>
-  //  CHECK-DAG: %[[dN:.*]] = memref.dim %[[TB]], %[[C1]] : tensor<?x?xf32>
-  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = memref.dim %arg1, %c1 : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dM:.*]] = tensor.dim %[[TA]], %[[C0]] : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dK:.*]] = tensor.dim %[[TA]], %[[C1]] : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dN:.*]] = tensor.dim %[[TB]], %[[C1]] : tensor<?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %arg1, %c1 : tensor<?x?xf32>

  //      CHECK: scf.for %[[I:[0-9a-z]+]] =
  // First padded tensor is MxKx2x4 under loop M so Kx2x4
@@ -94,19 +94,19 @@ func @matmul_tensors(
  %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
    %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
      %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
-        %6 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+        %6 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
        %7 = affine.min #map0(%arg3)[%6]
-        %8 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+        %8 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
        %9 = affine.min #map1(%arg7)[%8]
        %10 = tensor.extract_slice %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %11 = memref.dim %arg1, %c0 : tensor<?x?xf32>
+        %11 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
        %12 = affine.min #map1(%arg7)[%11]
-        %13 = memref.dim %arg1, %c1 : tensor<?x?xf32>
+        %13 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
        %14 = affine.min #map2(%arg5)[%13]
        %15 = tensor.extract_slice %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %16 = memref.dim %arg8, %c0 : tensor<?x?xf32>
+        %16 = tensor.dim %arg8, %c0 : tensor<?x?xf32>
        %17 = affine.min #map3(%16, %arg3)
-        %18 = memref.dim %arg8, %c1 : tensor<?x?xf32>
+        %18 = tensor.dim %arg8, %c1 : tensor<?x?xf32>
        %19 = affine.min #map4(%18, %arg5)
        %20 = tensor.extract_slice %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
        %21 = subi %c2, %7 : index
@@ -159,9 +159,9 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
  %cst = constant 0.000000e+00 : f32
  %c2 = constant 2 : index
  %c0 = constant 0 : index
-  %1 = memref.dim %arg0, %c0 : tensor<?xf32>
-  %2 = memref.dim %arg0, %c0 : tensor<?xf32>
-  %3 = memref.dim %arg1, %c0 : tensor<?xf32>
+  %1 = tensor.dim %arg0, %c0 : tensor<?xf32>
+  %2 = tensor.dim %arg0, %c0 : tensor<?xf32>
+  %3 = tensor.dim %arg1, %c0 : tensor<?xf32>

  //      CHECK: scf.for %[[I:[0-9a-z]+]] =
  //
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -478,7 +478,7 @@ func @unit_dim_reshape_expansion_full
  %c1 = constant 1 : index
  %0 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3, 4], [5]]
    : tensor<1x?x1x2x1x4xf32> into tensor<?x2x4xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<1x?x1x2x1x4xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<1x?x1x2x1x4xf32>
  %2 = linalg.init_tensor [%1, 2, 4] : tensor<?x2x4xf32>
  %3 = linalg.generic
    {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -5,8 +5,8 @@ func @init_tensor_static_dim() -> (index, index) {
  %c2 = constant 2 : index
  %c6 = constant 6 : index
  %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
-  %1 = memref.dim %0, %c2 : tensor<4x5x?xf32>
-  %2 = memref.dim %0, %c0 : tensor<4x5x?xf32>
+  %1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
+  %2 = tensor.dim %0, %c0 : tensor<4x5x?xf32>
  return %1, %2 : index, index
 }
 //      CHECK: func @init_tensor_static_dim
@@ -19,7 +19,7 @@ func @init_tensor_static_dim() -> (index, index) {
 func @init_tensor_dynamic_dim(%arg0 : index) -> (index) {
  %c2 = constant 2 : index
  %0 = linalg.init_tensor [4, 5, %arg0] : tensor<4x5x?xf32>
-  %1 = memref.dim %0, %c2 : tensor<4x5x?xf32>
+  %1 = tensor.dim %0, %c2 : tensor<4x5x?xf32>
  return %1 : index
 }
 //      CHECK: func @init_tensor_dynamic_dim
@@ -32,8 +32,8 @@ func @init_tensor_dynamic_dim2(%arg0 : index, %arg1 : index) -> (index, index) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
-  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
-  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
+  %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
+  %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
  return %1, %2 : index, index
 }
 //      CHECK: func @init_tensor_dynamic_dim2
@@ -60,8 +60,8 @@ func @remove_dim_result_uses
      %2 = addf %1, %arg5 : f32
      linalg.yield %2 : f32
    } -> tensor<?x?xf32>
-  %3 = memref.dim %0, %c0 : tensor<?x?xf32>
-  %4 = memref.dim %0, %c1 : tensor<?x?xf32>
+  %3 = tensor.dim %0, %c0 : tensor<?x?xf32>
+  %4 = tensor.dim %0, %c1 : tensor<?x?xf32>
  return %3, %4 : index, index
 }
 //       CHECK: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
@@ -72,11 +72,11 @@ func @remove_dim_result_uses
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[T1:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T1:.+]] = tensor.dim %[[ARG1]], %[[C1]]
 //       CHECK:   %[[T2:.+]] = affine.apply #[[MAP0]]()[%[[T0]], %[[T1]]]
-//   CHECK-DAG:   %[[T3:.+]] = memref.dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[T4:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T3:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T4:.+]] = tensor.dim %[[ARG1]], %[[C1]]
 //       CHECK:   %[[T5:.+]] = affine.apply #[[MAP1]]()[%[[T3]], %[[T4]]]
 //       CHECK:   return %[[T2]], %[[T5]]

@@ -86,7 +86,7 @@ func @remove_dim_result_uses_outs
  (%arg0 : tensor<?xf32>, %arg1 : index) -> (index) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %d0 = memref.dim %arg0, %c0 : tensor<?xf32>
+  %d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
  %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
  %1 = linalg.generic
    {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -96,7 +96,7 @@ func @remove_dim_result_uses_outs
    ^bb0(%arg2: f32, %arg3: f32) :
      linalg.yield %arg2 : f32
    } -> tensor<?x?xf32>
-  %2 = memref.dim %1, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %1, %c1 : tensor<?x?xf32>
  return %2 : index
 }
 //      CHECK: func @remove_dim_result_uses_outs
@@ -112,8 +112,8 @@ func @remove_dim_result_uses_sequence
  %c1 = constant 1 : index
  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
-  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
+  %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
+  %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
  %3 = linalg.generic
    {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>,
                      affine_map<(d0, d1, d2) -> (d0, d2)>,
@@ -126,8 +126,8 @@ func @remove_dim_result_uses_sequence
      %5 = addf %4, %arg5 : f32
      linalg.yield %5 : f32
    } -> tensor<?x?xf32>
-  %6 = memref.dim %3, %c0 : tensor<?x?xf32>
-  %7 = memref.dim %3, %c1 : tensor<?x?xf32>
+  %6 = tensor.dim %3, %c0 : tensor<?x?xf32>
+  %7 = tensor.dim %3, %c1 : tensor<?x?xf32>
  return %1, %2, %6, %7 : index, index, index, index
 }
 // CHECK-LABEL: func @remove_dim_result_uses_sequence
@@ -136,10 +136,10 @@ func @remove_dim_result_uses_sequence
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[T1:.+]] = memref.dim %[[ARG1]], %[[C1]]
-//   CHECK-DAG:   %[[T2:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//   CHECK-DAG:   %[[T3:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T1:.+]] = tensor.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T2:.+]] = tensor.dim %[[ARG0]], %[[C1]]
+//   CHECK-DAG:   %[[T3:.+]] = tensor.dim %[[ARG1]], %[[C1]]
 //       CHECK:   return %[[T0]], %[[T1]], %[[T2]], %[[T3]]

 // -----
@@ -148,7 +148,7 @@ func @keep_result_dim_uses_sequence2
  (%arg0 : tensor<?xf32>, %arg1 : index) -> (index, index) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
-  %d0 = memref.dim %arg0, %c0 : tensor<?xf32>
+  %d0 = tensor.dim %arg0, %c0 : tensor<?xf32>
  %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
  %1 = linalg.generic
    {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -158,15 +158,15 @@ func @keep_result_dim_uses_sequence2
    ^bb0(%arg2: f32, %arg3 : f32):
      linalg.yield %arg2 : f32
    } -> tensor<?x?xf32>
-  %2 = memref.dim %1, %c0 : tensor<?x?xf32>
-  %3 = memref.dim %1, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %1, %c0 : tensor<?x?xf32>
+  %3 = tensor.dim %1, %c1 : tensor<?x?xf32>
  return %2, %3 : index, index
 }
 //       CHECK: func @keep_result_dim_uses_sequence2
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
-//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 //       CHECK:   return %[[T0]], %[[ARG1]]

 // -----
@@ -185,16 +185,16 @@ func @init_tensor_dim_of_linalg_result(%arg_0 : tensor<?xf32>,
  } -> (tensor<?xf32>, tensor<?xf32>)

  %c0 = constant 0 : index
-  %num_elem_0 = memref.dim %0, %c0 : tensor<?xf32>
+  %num_elem_0 = tensor.dim %0, %c0 : tensor<?xf32>

-  %num_elem_1 = memref.dim %1, %c0 : tensor<?xf32>
+  %num_elem_1 = tensor.dim %1, %c0 : tensor<?xf32>
  return %num_elem_0, %num_elem_1 : index, index
 }
 //      CHECK: func @init_tensor_dim_of_linalg_result(
 // CHECK-SAME:   %[[ARG_0:[a-zA-Z0-9_]+]]: tensor<?xf32>
 // CHECK-SAME:   %[[ARG_1:[a-zA-Z0-9_]+]]: tensor<?xf32>)
-//      CHECK:   %[[R0:.+]] = memref.dim %[[ARG_0]]
-//      CHECK:   %[[R1:.+]] = memref.dim %[[ARG_0]]
+//      CHECK:   %[[R0:.+]] = tensor.dim %[[ARG_0]]
+//      CHECK:   %[[R1:.+]] = tensor.dim %[[ARG_0]]
 //      CHECK:   return %[[R0]], %[[R1]]

 // -----
@@ -206,9 +206,9 @@ func @dim_reshape_expansion(%arg0 : tensor<6x5x?xf32>) -> (index, index, index)
  %c4 = constant 4 : index
  %0 = linalg.tensor_expand_shape %arg0 [[0, 1], [2], [3, 4, 5]]
      : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
-  %1 = memref.dim %0, %c1 : tensor<2x3x5x4x?x7xf32>
-  %2 = memref.dim %0, %c3 : tensor<2x3x5x4x?x7xf32>
-  %3 = memref.dim %0, %c4 : tensor<2x3x5x4x?x7xf32>
+  %1 = tensor.dim %0, %c1 : tensor<2x3x5x4x?x7xf32>
+  %2 = tensor.dim %0, %c3 : tensor<2x3x5x4x?x7xf32>
+  %3 = tensor.dim %0, %c4 : tensor<2x3x5x4x?x7xf32>
  return %1, %2, %3 : index, index, index
 }
 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
@@ -217,7 +217,7 @@ func @dim_reshape_expansion(%arg0 : tensor<6x5x?xf32>) -> (index, index, index)
 //  CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //  CHECK-DAG:   %[[C3:.+]] = constant 3 : index
 //  CHECK-DAG:   %[[C4:.+]] = constant 4 : index
-//      CHECK:   %[[D0:.+]] = memref.dim %[[ARG0]], %[[C2]]
+//      CHECK:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C2]]
 //      CHECK:   %[[D1:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   return %[[C3]], %[[C4]], %[[D1]]

@@ -229,8 +229,8 @@ func @dim_reshape_collapse(%arg0 : tensor<2x3x5x4x?x7xf32>) -> (index, index)
  %c2 = constant 2 : index
  %0 = linalg.tensor_collapse_shape %arg0 [[0, 1], [2], [3, 4, 5]]
      : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
-  %1 = memref.dim %0, %c1 : tensor<6x5x?xf32>
-  %2 = memref.dim %0, %c2 : tensor<6x5x?xf32>
+  %1 = tensor.dim %0, %c1 : tensor<6x5x?xf32>
+  %2 = tensor.dim %0, %c2 : tensor<6x5x?xf32>
  return %1, %2 : index, index
 }
 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
@@ -238,7 +238,7 @@ func @dim_reshape_collapse(%arg0 : tensor<2x3x5x4x?x7xf32>) -> (index, index)
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x3x5x4x?x7xf32>
 //  CHECK-DAG:   %[[C4:.+]] = constant 4 : index
 //  CHECK-DAG:   %[[C5:.+]] = constant 5 : index
-//      CHECK:   %[[D0:.+]] = memref.dim %[[ARG0]], %[[C4]]
+//      CHECK:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C4]]
 //      CHECK:   %[[D1:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   return %[[C5]], %[[D1]]

@@ -257,9 +257,9 @@ func @dim_of_pad_op(%arg0 : tensor<2x?x?xf32>, %arg1 : index, %arg2 : index,
     ^bb0(%arg4: index, %arg5: index, %arg6: index):
       linalg.yield %arg3 : f32
   } : tensor<2x?x?xf32> to tensor<?x?x?xf32>
-   %1 = memref.dim %0, %c0 : tensor<?x?x?xf32>
-   %2 = memref.dim %0, %c1 : tensor<?x?x?xf32>
-   %3 = memref.dim %0, %c2 : tensor<?x?x?xf32>
+   %1 = tensor.dim %0, %c0 : tensor<?x?x?xf32>
+   %2 = tensor.dim %0, %c1 : tensor<?x?x?xf32>
+   %3 = tensor.dim %0, %c2 : tensor<?x?x?xf32>
   return %1, %2, %3 : index, index, index
 }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s1 + s0 + 5)>
@@ -271,8 +271,8 @@ func @dim_of_pad_op(%arg0 : tensor<2x?x?xf32>, %arg1 : index, %arg2 : index,
 //  CHECK-DAG:   %[[C1:.+]] = constant 1 : index
 //  CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //  CHECK-DAG:   %[[C12:.+]] = constant 12 : index
-//      CHECK:   %[[IN_DIM1:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//      CHECK:   %[[IN_DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //      CHECK:   %[[OUT_DIM1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]], %[[IN_DIM1]]]
-//      CHECK:   %[[IN_DIM2:.+]] = memref.dim %[[ARG0]], %[[C2]]
+//      CHECK:   %[[IN_DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
 //      CHECK:   %[[OUT_DIM2:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[IN_DIM2]]]
 //      CHECK:   return %[[C12]], %[[OUT_DIM1]], %[[OUT_DIM2]]
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -781,9 +781,9 @@ func @tiled_loop_reduction(%input_3d: tensor<16x24x32xf32>,
  %c2 = constant 2 : index
  %c4 = constant 4 : index
  %c8 = constant 8 : index
-  %X = memref.dim %input_3d, %c0 : tensor<16x24x32xf32>
-  %Y = memref.dim %input_3d, %c1 : tensor<16x24x32xf32>
-  %Z = memref.dim %input_3d, %c2 : tensor<16x24x32xf32>
+  %X = tensor.dim %input_3d, %c0 : tensor<16x24x32xf32>
+  %Y = tensor.dim %input_3d, %c1 : tensor<16x24x32xf32>
+  %Z = tensor.dim %input_3d, %c2 : tensor<16x24x32xf32>
  %result = linalg.tiled_loop (%i, %j, %k)
      = (%c0, %c0, %c0) to (%X, %Y, %Z) step (%c2, %c4, %c8)
      ins(%i3d_ = %input_3d: tensor<16x24x32xf32>,
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -133,7 +133,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x5xf32>
 //   CHECK-NOT:   linalg.pad_tensor
 //       CHECK:   %[[C0:.*]] = constant 0 : index
-//       CHECK:   memref.dim %[[ARG0]], %[[C0]]
+//       CHECK:   tensor.dim %[[ARG0]], %[[C0]]
 //       CHECK:   %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) {
 //       CHECK:     %[[GEN:.*]] = tensor.generate
 //       CHECK:     scf.yield %[[GEN]]
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -10,9 +10,9 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
  %c0 = constant 0 : index
  %c3 = constant 3 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %t0, %c0 : tensor<?x?xf32>
-  %1 = memref.dim %t0, %c1 : tensor<?x?xf32>
-  %2 = memref.dim %arg1, %c1 : tensor<?x?xf32>
+  %0 = tensor.dim %t0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %t0, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
  %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
    %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
      %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
@@ -40,12 +40,12 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens

 //   CHECK-DAG: %[[C0:.*]] = constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = constant 1 : index
-//   CHECK-DAG: %[[dA0:.*]] = memref.dim %[[A]], %[[C0]] : tensor<?x?xf32>
-//   CHECK-DAG: %[[dA1:.*]] = memref.dim %[[A]], %[[C1]] : tensor<?x?xf32>
-//   CHECK-DAG: %[[dB0:.*]] = memref.dim %[[B]], %[[C0]] : tensor<?x?xf32>
-//   CHECK-DAG: %[[dB1:.*]] = memref.dim %[[B]], %[[C1]] : tensor<?x?xf32>
-//   CHECK-DAG: %[[dC0:.*]] = memref.dim %[[C]], %[[C0]] : tensor<?x?xf32>
-//   CHECK-DAG: %[[dC1:.*]] = memref.dim %[[C]], %[[C1]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dA0:.*]] = tensor.dim %[[A]], %[[C0]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dA1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dB0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dB1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dC0:.*]] = tensor.dim %[[C]], %[[C0]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dC1:.*]] = tensor.dim %[[C]], %[[C1]] : tensor<?x?xf32>
 //       CHECK: scf.for %[[I:[0-9a-z]*]]
 //       CHECK:   %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]]
 //       CHECK:   %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
@@ -153,10 +153,10 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
  %c8 = constant 8 : index
  %c16 = constant 16 : index

-  %n = memref.dim %elementwise, %c0 : tensor<?x?x?x?xf32>
-  %oh = memref.dim %elementwise, %c1 : tensor<?x?x?x?xf32>
-  %ow = memref.dim %elementwise, %c2 : tensor<?x?x?x?xf32>
-  %oc = memref.dim %elementwise, %c3 : tensor<?x?x?x?xf32>
+  %n = tensor.dim %elementwise, %c0 : tensor<?x?x?x?xf32>
+  %oh = tensor.dim %elementwise, %c1 : tensor<?x?x?x?xf32>
+  %ow = tensor.dim %elementwise, %c2 : tensor<?x?x?x?xf32>
+  %oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>

  %init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor<?x?x?x?xf32>
  %fill = linalg.fill(%cst, %init) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32>
@@ -222,26 +222,26 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
 //  CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //  CHECK-DAG:   %[[C3:.+]] = constant 3 : index

-//  CHECK-DAG:   %[[ELEM_N:.+]] = memref.dim %[[ELEM]], %[[C0]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[ELEM_OH:.+]] = memref.dim %[[ELEM]], %[[C1]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[ELEM_OW:.+]] = memref.dim %[[ELEM]], %[[C2]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[ELEM_OC:.+]] = memref.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[ELEM_N:.+]] = tensor.dim %[[ELEM]], %[[C0]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[ELEM_OH:.+]] = tensor.dim %[[ELEM]], %[[C1]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[ELEM_OW:.+]] = tensor.dim %[[ELEM]], %[[C2]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>

 //      CHECK:   %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor<?x?x?x?xf32>
 //      CHECK:   %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32>

-//  CHECK-DAG:   %[[FILTER_H:.+]] = memref.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILTER_W:.+]] = memref.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[INPUT_N:.+]] = memref.dim %[[INPUT]], %[[C0]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[INPUT_H:.+]] = memref.dim %[[INPUT]], %[[C1]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[INPUT_W:.+]] = memref.dim %[[INPUT]], %[[C2]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[INPUT_C:.+]] = memref.dim %[[INPUT]], %[[C3]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILTER_IC:.+]] = memref.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILTER_OC:.+]] = memref.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILL_N:.+]] = memref.dim %[[FILL]], %[[C0]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILL_H:.+]] = memref.dim %[[FILL]], %[[C1]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILL_W:.+]] = memref.dim %[[FILL]], %[[C2]] : tensor<?x?x?x?xf32>
-//  CHECK-DAG:   %[[FILL_C:.+]] = memref.dim %[[FILL]], %[[C3]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[INPUT_N:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[INPUT_H:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[INPUT_W:.+]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[INPUT_C:.+]] = tensor.dim %[[INPUT]], %[[C3]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILTER_IC:.+]] = tensor.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILTER_OC:.+]] = tensor.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILL_N:.+]] = tensor.dim %[[FILL]], %[[C0]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILL_H:.+]] = tensor.dim %[[FILL]], %[[C1]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILL_W:.+]] = tensor.dim %[[FILL]], %[[C2]] : tensor<?x?x?x?xf32>
+//  CHECK-DAG:   %[[FILL_C:.+]] = tensor.dim %[[FILL]], %[[C3]] : tensor<?x?x?x?xf32>

 //      CHECK:   scf.for %[[IV0:.+]] = %{{.+}} to %[[ELEM_N]] step %{{.+}} iter_args(%{{.+}} = %[[FILL]])
 // CHECK-NEXT:     %[[SIZE_ELEM_N:.+]] = affine.min #[[BOUND8_MAP]](%[[IV0]])[%[[ELEM_N]]]
@@ -311,8 +311,8 @@ func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64
  %c32 = constant 32 : index
  %zero = constant 0.0 : f32

-  %d0 = memref.dim %large_input, %c0 : tensor<64x128xf32>
-  %d1 = memref.dim %large_input, %c1 : tensor<64x128xf32>
+  %d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32>
+  %d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32>

  %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] {
  ^bb0(%arg0: index, %arg1: index):
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -38,9 +38,9 @@ func @matmul_tensors(
 // TLOOP-DAG: %[[C3:.*]] = constant 3 : index
 // TLOOP-DAG: %[[C4:.*]] = constant 4 : index

-// TLOOP: %[[ARG_0_X:.*]] = memref.dim %[[ARG_0]], %[[C0]] : [[TY]]
-// TLOOP: %[[ARG_0_Y:.*]] = memref.dim %[[ARG_0]], %[[C1]] : [[TY]]
-// TLOOP: %[[ARG_1_Y:.*]] = memref.dim %[[ARG_1]], %[[C1]] : [[TY]]
+// TLOOP: %[[ARG_0_X:.*]] = tensor.dim %[[ARG_0]], %[[C0]] : [[TY]]
+// TLOOP: %[[ARG_0_Y:.*]] = tensor.dim %[[ARG_0]], %[[C1]] : [[TY]]
+// TLOOP: %[[ARG_1_Y:.*]] = tensor.dim %[[ARG_1]], %[[C1]] : [[TY]]

 // TLOOP: %{{.*}} = linalg.tiled_loop (%[[I:.*]], %[[J:.*]], %[[K:.*]]) =
 // TLOOP-SAME: (%[[C0]], %[[C0]], %[[C0]])
@@ -68,9 +68,9 @@ func @generic_op_tensors(
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %c2 = constant 2 : index
-  %0 = memref.dim %arg0, %c0 : tensor<?x?x?xf32>
-  %1 = memref.dim %arg0, %c1 : tensor<?x?x?xf32>
-  %2 = memref.dim %arg0, %c2 : tensor<?x?x?xf32>
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
  %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
  %4 = linalg.generic
    {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
@@ -119,9 +119,9 @@ func @generic_op_tensors(
 // TLOOP-DAG: %[[C4:.*]] = constant 4 : index

 // TLOOP:     %[[INIT:.*]] = linalg.init_tensor
-// TLOOP:     %[[ARG_0_X:.*]] = memref.dim %[[ARG_0]], %[[C0]] : [[TY]]
-// TLOOP:     %[[ARG_0_Y:.*]] = memref.dim %[[ARG_0]], %[[C1]] : [[TY]]
-// TLOOP:     %[[ARG_0_Z:.*]] = memref.dim %[[ARG_0]], %[[C2]] : [[TY]]
+// TLOOP:     %[[ARG_0_X:.*]] = tensor.dim %[[ARG_0]], %[[C0]] : [[TY]]
+// TLOOP:     %[[ARG_0_Y:.*]] = tensor.dim %[[ARG_0]], %[[C1]] : [[TY]]
+// TLOOP:     %[[ARG_0_Z:.*]] = tensor.dim %[[ARG_0]], %[[C2]] : [[TY]]

 // TLOOP:     %{{.*}} = linalg.tiled_loop (%{{.*}}, %{{.*}}, %{{.*}}) =
 // TLOOP-SAME: (%[[C0]], %[[C0]], %[[C0]])
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -580,12 +580,12 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
 //       CHECK:   %[[V0:.*]] = addi %[[LOW]], %[[C2]] : index
 //       CHECK:   %[[V1:.*]] = addi %[[V0]], %[[C3]] : index
 //       CHECK:   %[[V2:.*]] = addi %[[HIGH]], %[[C5]] : index
-//       CHECK:   %[[DIM3:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
+//       CHECK:   %[[DIM3:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[V4:.*]] = addi %[[DIM3]], %[[C3]] : index
 //       CHECK:   %[[V5:.*]] = addi %[[V4]], %[[C2]] : index
 //       CHECK:   %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
 //       CHECK:   %[[FILL:.*]] = linalg.fill(%{{.*}}, %[[INIT]]) : f32, tensor<6x?x?x?xf32> -> tensor<6x?x?x?xf32>
-//       CHECK:   %[[SRCDIM:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
+//       CHECK:   %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@@ -236,7 +236,7 @@ func @load_from_buffer_cast(%arg0: index, %arg1: index, %arg2: tensor<?x?xf32>)
 // -----


-// Test case: Basic folding of memref.dim(memref.tensor_load(m)) -> memref.dim(m).
+// Test case: Basic folding of tensor.dim(memref.tensor_load(m)) -> memref.dim(m).
 // CHECK-LABEL: func @dim_of_tensor_load(
 //  CHECK-SAME:     %[[MEMREF:[0-9a-z]*]]: memref<?xf32>
 //       CHECK:   %[[C0:.*]] = constant 0
@@ -245,24 +245,7 @@ func @load_from_buffer_cast(%arg0: index, %arg1: index, %arg2: tensor<?x?xf32>)
 func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
  %c0 = constant 0 : index
  %0 = memref.tensor_load %arg0 : memref<?xf32>
-  %1 = memref.dim %0, %c0 : tensor<?xf32>
-  return %1 : index
-}
-
-// -----
-
-// Test case: Folding of memref.dim(tensor.generate %idx) -> %idx
-// CHECK-LABEL: func @dim_of_tensor.generate(
-//  CHECK-SAME:     %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
-//   CHECK-NOT:   memref.dim
-//       CHECK:   return %[[IDX1]] : index
-func @dim_of_tensor.generate(%arg0: index, %arg1: index) -> index {
-  %c3 = constant 3 : index
-  %0 = tensor.generate %arg0, %arg1 {
-  ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
-    tensor.yield %c3 : index
-  } : tensor<2x?x4x?x5xindex>
-  %1 = memref.dim %0, %c3 : tensor<2x?x4x?x5xindex>
+  %1 = tensor.dim %0, %c0 : tensor<?xf32>
  return %1 : index
 }

@@ -338,24 +321,6 @@ func @dim_of_memref_reshape_i32(%arg0: memref<*xf32>, %arg1: memref<?xi32>)

 // -----

-// Test case: Folding memref.dim(tensor.cast %0, %idx) -> memref.dim %0, %idx
-// CHECK-LABEL: func @fold_dim_of_tensor.cast
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x?xf32>
-//   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//   CHECK-DAG:   %[[C4:.+]] = constant 4 : index
-//       CHECK:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//  CHECK-NEXT:   return %[[C4]], %[[T0]]
-func @fold_dim_of_tensor.cast(%arg0 : tensor<4x?xf32>) -> (index, index) {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
-  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
-  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
-  return %1, %2: index, index
-}
-
-// -----
-
 // CHECK-LABEL: func @tensor_cast_to_memref
 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<4x6x16x32xi8>
 //       CHECK:   %[[M:.+]] = memref.buffer_cast %[[ARG0]] : memref<4x6x16x32xi8>
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -36,7 +36,7 @@
 //       CHECK: return %[[D]] : index
 func @sparse_dim(%arg0: tensor<?xf64, #SparseVector>) -> index {
  %c = constant 0 : index
-  %0 = memref.dim %arg0, %c : tensor<?xf64, #SparseVector>
+  %0 = tensor.dim %arg0, %c : tensor<?xf64, #SparseVector>
  return %0 : index
 }

--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -1096,7 +1096,7 @@ func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.pointers %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.indices %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
-// CHECK:           %[[VAL_16:.*]] = memref.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
+// CHECK:           %[[VAL_16:.*]] = tensor.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
 // CHECK:           %[[VAL_17:.*]] = memref.buffer_cast %[[VAL_4]] : memref<?xf64>
 // CHECK:           %[[VAL_18:.*]] = memref.alloc(%[[VAL_16]]) : memref<?xf64>
 // CHECK:           linalg.copy(%[[VAL_17]], %[[VAL_18]]) : memref<?xf64>, memref<?xf64>
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -977,8 +977,8 @@ func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tenso
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
-// CHECK:           %[[VAL_8:.*]] = memref.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
-// CHECK:           %[[VAL_9:.*]] = memref.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
+// CHECK:           %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
+// CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
 // CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf64>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc(%[[VAL_8]], %[[VAL_9]]) : memref<?x?xf64>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<?x?xf64>, memref<?x?xf64>
@@ -1032,10 +1032,10 @@ func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor<?x?x
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_12:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.dim %[[VAL_3]], %[[VAL_4]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_14:.*]] = tensor.dim %[[VAL_3]], %[[VAL_4]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_15:.*]] = tensor.dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.alloc(%[[VAL_14]], %[[VAL_15]]) : memref<?x?xf32>
 // CHECK:           linalg.copy(%[[VAL_16]], %[[VAL_17]]) : memref<?x?xf32>, memref<?x?xf32>
@@ -1115,7 +1115,7 @@ func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
 // CHECK:           %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
 // CHECK:           %[[VAL_20:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?xf32>
 // CHECK:           %[[VAL_21:.*]] = memref.buffer_cast %[[VAL_4]] : memref<f32>
-// CHECK:           %[[VAL_22:.*]] = memref.dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
+// CHECK:           %[[VAL_22:.*]] = tensor.dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
 // CHECK:           %[[VAL_23:.*]] = memref.buffer_cast %[[VAL_5]] : memref<?xf32>
 // CHECK:           %[[VAL_24:.*]] = memref.alloc(%[[VAL_22]]) : memref<?xf32>
 // CHECK:           linalg.copy(%[[VAL_23]], %[[VAL_24]]) : memref<?xf32>, memref<?xf32>
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@@ -1135,11 +1135,11 @@ func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_13:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_14:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<?x?xf32>, memref<?x?xf32>
@@ -1256,10 +1256,10 @@ func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> t
 // CHECK:           %[[VAL_3:.*]] = constant 2 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = memref.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
+// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
 // CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?x?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.dim %[[VAL_1]], %[[VAL_4]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>
+// CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<f32>
--- a/mlir/test/Dialect/Standard/bufferize.mlir
+++ b/mlir/test/Dialect/Standard/bufferize.mlir
@@ -7,7 +7,7 @@
 // CHECK:           %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref<f32>
 // CHECK:           return %[[EXTENT]] : index
 func @dim(%arg0: tensor<f32>, %arg1: index) -> index {
-  %0 = memref.dim %arg0, %arg1 : tensor<f32>
+  %0 = tensor.dim %arg0, %arg1 : tensor<f32>
  return %0 : index
 }

--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@@ -67,7 +67,7 @@ func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
 // CHECK:           scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[DYNAMIC_EXTENT]]) step (%[[C1]]) {
-// CHECK:             %[[ELEM:.*]] = memref.dim %[[ARG]], %[[I]] : tensor<*xf32>
+// CHECK:             %[[ELEM:.*]] = tensor.dim %[[ARG]], %[[I]] : tensor<*xf32>
 // CHECK:             store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref<?xindex>
 // CHECK:             scf.yield
 // CHECK:           }
@@ -77,7 +77,7 @@ func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
 func @tensor.generate(%arg: tensor<*xf32>, %dynamic_extent: index) -> tensor<?xindex> {
  %result = tensor.generate %dynamic_extent {
  ^bb0(%i : index):
-    %elem = memref.dim %arg, %i : tensor<*xf32>
+    %elem = tensor.dim %arg, %i : tensor<*xf32>
    tensor.yield %elem : index
  } : tensor<?xindex>
  return %result : tensor<?xindex>
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -184,10 +184,10 @@ func @extract_oob_from_tensor.from_elements(%element : index) -> index {
 // CHECK-SAME: %[[IDX:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
 func @extract_from_tensor.generate(%idx: index, %tensor: tensor<*xf32>) -> index {
  %size = rank %tensor : tensor<*xf32>
-  // CHECK-NEXT: %[[RES:.*]] = memref.dim %[[TENSOR]], %[[IDX]]
+  // CHECK-NEXT: %[[RES:.*]] = tensor.dim %[[TENSOR]], %[[IDX]]
  %0 = tensor.generate %size {
    ^bb0(%arg0: index):
-    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
+    %1 = tensor.dim %tensor, %arg0 : tensor<*xf32>
    tensor.yield %1 : index
  } : tensor<?xindex>
  %1 = tensor.extract %0[%idx] : tensor<?xindex>
@@ -201,13 +201,13 @@ func @extract_from_tensor.generate(%idx: index, %tensor: tensor<*xf32>) -> index
 // CHECK-SAME: %[[IDX0:.*]]: index, %[[IDX1:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
 func @extract_from_tensor.generate_2d(%idx0: index, %idx1: index, %tensor: tensor<*xf32>) -> index {
  %size = rank %tensor : tensor<*xf32>
-  // CHECK-NEXT: %[[DIM0:.*]] = memref.dim %[[TENSOR]], %[[IDX0]]
-  // CHECK-NEXT: %[[DIM1:.*]] = memref.dim %[[TENSOR]], %[[IDX1]]
+  // CHECK-NEXT: %[[DIM0:.*]] = tensor.dim %[[TENSOR]], %[[IDX0]]
+  // CHECK-NEXT: %[[DIM1:.*]] = tensor.dim %[[TENSOR]], %[[IDX1]]
  // CHECK-NEXT: %[[RES:.*]] = addi %[[DIM0]], %[[DIM1]]
  %0 = tensor.generate %size, %size {
    ^bb0(%arg0: index, %arg1: index):
-    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
-    %2 = memref.dim %tensor, %arg1 : tensor<*xf32>
+    %1 = tensor.dim %tensor, %arg0 : tensor<*xf32>
+    %2 = tensor.dim %tensor, %arg1 : tensor<*xf32>
    %3 = addi %1, %2 : index
    tensor.yield %3 : index
  } : tensor<?x?xindex>
@@ -225,7 +225,7 @@ func @extract_from_tensor.generate_sideeffects(%idx: index, %tensor: tensor<*xf3
  // CHECK: %[[DTENSOR:.*]] = tensor.generate
  %0 = tensor.generate %size {
    ^bb0(%arg0: index):
-    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
+    %1 = tensor.dim %tensor, %arg0 : tensor<*xf32>
    memref.store %1, %mem[%arg0] : memref<?xindex>
    tensor.yield %1 : index
  } : tensor<?xindex>
@@ -443,7 +443,7 @@ func @insert_slice_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor<i
  %c1 = constant 1 : index
  %c2 = constant 2 : index
  %c8 = constant 8 : index
-  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
+  %0 = tensor.dim %arg0, %c1 : tensor<2x?xi32>
  %1 = tensor.extract %arg1[] : tensor<i32>
  %2 = tensor.generate %arg2, %c8 {
  ^bb0(%arg4: index, %arg5: index):
@@ -482,3 +482,38 @@ func @insert_slice_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : ten
 //       CHECK:   %[[GENERATE:.+]] = tensor.generate
 //       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[GENERATE]]
 //       CHECK:   return %[[RESULT]]
+
+// -----
+
+// Test case: Folding of tensor.dim(tensor.generate %idx) -> %idx
+// CHECK-LABEL: func @dim_of_tensor.generate(
+//  CHECK-SAME:     %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
+//   CHECK-NOT:   tensor.dim
+//       CHECK:   return %[[IDX1]] : index
+func @dim_of_tensor.generate(%arg0: index, %arg1: index) -> index {
+  %c3 = constant 3 : index
+  %0 = tensor.generate %arg0, %arg1 {
+  ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
+    tensor.yield %c3 : index
+  } : tensor<2x?x4x?x5xindex>
+  %1 = tensor.dim %0, %c3 : tensor<2x?x4x?x5xindex>
+  return %1 : index
+}
+
+// -----
+
+// Test case: Folding tensor.dim(tensor.cast %0, %idx) -> tensor.dim %0, %idx
+// CHECK-LABEL: func @fold_dim_of_tensor.cast
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x?xf32>
+//   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
+//   CHECK-DAG:   %[[C4:.+]] = constant 4 : index
+//       CHECK:   %[[T0:.+]] = tensor.dim %[[ARG0]], %[[C1]]
+//  CHECK-NEXT:   return %[[C4]], %[[T0]]
+func @fold_dim_of_tensor.cast(%arg0 : tensor<4x?xf32>) -> (index, index) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
+  %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
+  %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
+  return %1, %2: index, index
+}
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@@ -35,9 +35,9 @@ func @func_with_ops(f32) {
  %t = "getTensor"() : () -> tensor<4x4x?xf32>

  // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK-NEXT: %{{.*}} = memref.dim %[[T]], %[[C2]] : tensor<4x4x?xf32>
+  // CHECK-NEXT: %{{.*}} = tensor.dim %[[T]], %[[C2]] : tensor<4x4x?xf32>
  %c2 = constant 2 : index
-  %t2 = "memref.dim"(%t, %c2) : (tensor<4x4x?xf32>, index) -> index
+  %t2 = "tensor.dim"(%t, %c2) : (tensor<4x4x?xf32>, index) -> index

  // CHECK: %{{.*}} = addf %[[ARG]], %[[ARG]] : f32
  %x = "std.addf"(%a, %a) : (f32,f32) -> (f32)
@@ -50,9 +50,9 @@ func @func_with_ops(f32) {
 func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) {
 ^bb42(%t: tensor<4x4x?xf32>, %f: f32, %i: i32, %idx : index, %j: i64, %half: f16):
  // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %[[A2:.*]] = memref.dim %arg0, %[[C2]] : tensor<4x4x?xf32>
+  // CHECK: %[[A2:.*]] = tensor.dim %arg0, %[[C2]] : tensor<4x4x?xf32>
  %c2 = constant 2 : index
-  %a2 = memref.dim %t, %c2 : tensor<4x4x?xf32>
+  %a2 = tensor.dim %t, %c2 : tensor<4x4x?xf32>

  // CHECK: %[[F2:.*]] = addf %arg1, %arg1 : f32
  %f2 = "std.addf"(%f, %f) : (f32,f32) -> f32
@@ -757,9 +757,9 @@ func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
 // CHECK-SAME: %[[ARG:.*]]: tensor<4x4x?xf32>
 func @test_dimop(%arg0: tensor<4x4x?xf32>) {
  // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %{{.*}} = memref.dim %[[ARG]], %[[C2]] : tensor<4x4x?xf32>
+  // CHECK: %{{.*}} = tensor.dim %[[ARG]], %[[C2]] : tensor<4x4x?xf32>
  %c2 = constant 2 : index
-  %0 = memref.dim %arg0, %c2 : tensor<4x4x?xf32>
+  %0 = tensor.dim %arg0, %c2 : tensor<4x4x?xf32>
  // use dim as an index to ensure type correctness
  %1 = affine.apply affine_map<(d0) -> (d0)>(%0)
  return
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -2,7 +2,7 @@

 func @dim(%arg : tensor<1x?xf32>) {
  %c2 = constant 2 : index
-  memref.dim %arg, %c2 : tensor<1x?xf32> // expected-error {{'memref.dim' op index is out of range}}
+  tensor.dim %arg, %c2 : tensor<1x?xf32> // expected-error {{'tensor.dim' op index is out of range}}
  return
 }

--- a/mlir/test/Interfaces/InferShapedTypeOpInterface/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Interfaces/InferShapedTypeOpInterface/resolve-shaped-type-result-dims.mlir
@@ -7,11 +7,11 @@ func @result_shape(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32>)
  %c2 = constant 2 : index
  %0:2 = "test.op_with_result_shape_interface"(%arg0, %arg1)
      : (tensor<2x3x?xf32>, tensor<?x5xf32>) -> (tensor<?x5xf32>, tensor<2x3x?xf32>)
-  %1 = memref.dim %0#0, %c0 : tensor<?x5xf32>
-  %2 = memref.dim %0#0, %c1 : tensor<?x5xf32>
-  %3 = memref.dim %0#1, %c0 : tensor<2x3x?xf32>
-  %4 = memref.dim %0#1, %c1 : tensor<2x3x?xf32>
-  %5 = memref.dim %0#1, %c2 : tensor<2x3x?xf32>
+  %1 = tensor.dim %0#0, %c0 : tensor<?x5xf32>
+  %2 = tensor.dim %0#0, %c1 : tensor<?x5xf32>
+  %3 = tensor.dim %0#1, %c0 : tensor<2x3x?xf32>
+  %4 = tensor.dim %0#1, %c1 : tensor<2x3x?xf32>
+  %5 = tensor.dim %0#1, %c2 : tensor<2x3x?xf32>
  return %1, %2, %3, %4, %5 : index, index, index, index, index
 }
 // CHECK-LABEL: func @result_shape(
@@ -21,10 +21,10 @@ func @result_shape(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32>)
 //   CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //   CHECK-DAG:   %[[C3:.+]] = constant 3 : index
 //   CHECK-DAG:   %[[C5:.+]] = constant 5 : index
-//   CHECK-DAG:   %[[D0:.+]] = memref.dim %[[ARG_1]], %[[C0]]
+//   CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG_1]], %[[C0]]
 //   CHECK-DAG:   %[[S0:.+]] = tensor.from_elements %[[D0]], %[[C5]]
 //   CHECK-DAG:   %[[D0_OUT:.+]] = tensor.extract %[[S0]][%[[C0]]]
-//   CHECK-DAG:   %[[D1:.+]] = memref.dim %[[ARG_0]], %[[C2]]
+//   CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG_0]], %[[C2]]
 //   CHECK-DAG:   %[[S1:.+]] = tensor.from_elements %[[C2]], %[[C3]], %[[D1]]
 //   CHECK-DAG:   %[[D1_OUT:.+]] = tensor.extract %[[S1]][%[[C2]]]
 //       CHECK:   return %[[D0_OUT]], %[[C5]], %[[C2]], %[[C3]], %[[D1_OUT]]
@@ -38,11 +38,11 @@ func @result_shape_per_dim(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32>)
  %c2 = constant 2 : index
  %0:2 = "test.op_with_result_shape_per_dim_interface"(%arg0, %arg1)
      : (tensor<2x3x?xf32>, tensor<?x5xf32>) -> (tensor<?x5xf32>, tensor<2x3x?xf32>)
-  %1 = memref.dim %0#0, %c0 : tensor<?x5xf32>
-  %2 = memref.dim %0#0, %c1 : tensor<?x5xf32>
-  %3 = memref.dim %0#1, %c0 : tensor<2x3x?xf32>
-  %4 = memref.dim %0#1, %c1 : tensor<2x3x?xf32>
-  %5 = memref.dim %0#1, %c2 : tensor<2x3x?xf32>
+  %1 = tensor.dim %0#0, %c0 : tensor<?x5xf32>
+  %2 = tensor.dim %0#0, %c1 : tensor<?x5xf32>
+  %3 = tensor.dim %0#1, %c0 : tensor<2x3x?xf32>
+  %4 = tensor.dim %0#1, %c1 : tensor<2x3x?xf32>
+  %5 = tensor.dim %0#1, %c2 : tensor<2x3x?xf32>
  return %1, %2, %3, %4, %5 : index, index, index, index, index
 }
 // CHECK-LABEL: func @result_shape_per_dim(
@@ -52,8 +52,8 @@ func @result_shape_per_dim(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32>)
 //   CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //   CHECK-DAG:   %[[C3:.+]] = constant 3 : index
 //   CHECK-DAG:   %[[C5:.+]] = constant 5 : index
-//   CHECK-DAG:   %[[D0:.+]] = memref.dim %[[ARG_1]], %[[C0]]
-//   CHECK-DAG:   %[[D1:.+]] = memref.dim %[[ARG_0]], %[[C2]]
+//   CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG_1]], %[[C0]]
+//   CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG_0]], %[[C2]]
 //       CHECK:   return %[[D0]], %[[C5]], %[[C2]], %[[C3]], %[[D1]]

 // -----
@@ -65,11 +65,11 @@ func @result_shape_and_per_dim(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32
  %c2 = constant 2 : index
  %0:2 = "test.op_with_result_shape_and_per_dim_interface"(%arg0, %arg1)
      : (tensor<2x3x?xf32>, tensor<?x5xf32>) -> (tensor<?x5xf32>, tensor<2x3x?xf32>)
-  %1 = memref.dim %0#0, %c0 : tensor<?x5xf32>
-  %2 = memref.dim %0#0, %c1 : tensor<?x5xf32>
-  %3 = memref.dim %0#1, %c0 : tensor<2x3x?xf32>
-  %4 = memref.dim %0#1, %c1 : tensor<2x3x?xf32>
-  %5 = memref.dim %0#1, %c2 : tensor<2x3x?xf32>
+  %1 = tensor.dim %0#0, %c0 : tensor<?x5xf32>
+  %2 = tensor.dim %0#0, %c1 : tensor<?x5xf32>
+  %3 = tensor.dim %0#1, %c0 : tensor<2x3x?xf32>
+  %4 = tensor.dim %0#1, %c1 : tensor<2x3x?xf32>
+  %5 = tensor.dim %0#1, %c2 : tensor<2x3x?xf32>
  return %1, %2, %3, %4, %5 : index, index, index, index, index
 }
 // CHECK-LABEL: func @result_shape_and_per_dim(
@@ -79,10 +79,10 @@ func @result_shape_and_per_dim(%arg0 : tensor<2x3x?xf32>, %arg1 : tensor<?x5xf32
 //   CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //   CHECK-DAG:   %[[C3:.+]] = constant 3 : index
 //   CHECK-DAG:   %[[C5:.+]] = constant 5 : index
-//   CHECK-DAG:   %[[D0:.+]] = memref.dim %[[ARG_1]], %[[C0]]
+//   CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG_1]], %[[C0]]
 //   CHECK-DAG:   %[[S0:.+]] = tensor.from_elements %[[D0]], %[[C5]]
 //   CHECK-DAG:   %[[D0_OUT:.+]] = tensor.extract %[[S0]][%[[C0]]]
-//   CHECK-DAG:   %[[D1:.+]] = memref.dim %[[ARG_0]], %[[C2]]
+//   CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG_0]], %[[C2]]
 //   CHECK-DAG:   %[[S1:.+]] = tensor.from_elements %[[C2]], %[[C3]], %[[D1]]
 //   CHECK-DAG:   %[[D1_OUT:.+]] = tensor.extract %[[S1]][%[[C2]]]
 //       CHECK:   return %[[D0_OUT]], %[[C5]], %[[C2]], %[[C3]], %[[D1_OUT]]
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -29,7 +29,7 @@ func @dim(%arg0: tensor<8x4xf32>) -> index {

  // CHECK: %c4 = constant 4 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c1 : tensor<8x4xf32>
+  %0 = tensor.dim %arg0, %c1 : tensor<8x4xf32>

  // CHECK-NEXT: return %c4
  return %0 : index
@@ -53,7 +53,7 @@ func @test_commutative(%arg0: i32) -> (i32, i32) {
 // CHECK-LABEL: func @trivial_dce
 func @trivial_dce(%arg0: tensor<8x4xf32>) {
  %c1 = constant 1 : index
-  %0 = memref.dim %arg0, %c1 : tensor<8x4xf32>
+  %0 = tensor.dim %arg0, %c1 : tensor<8x4xf32>
  // CHECK-NEXT: return
  return
 }
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -548,7 +548,7 @@ func @dim(%x : tensor<8x4xf32>) -> index {

  // CHECK:[[C4:%.+]] = constant 4 : index
  %c1 = constant 1 : index
-  %0 = memref.dim %x, %c1 : tensor<8x4xf32>
+  %0 = tensor.dim %x, %c1 : tensor<8x4xf32>

  // CHECK-NEXT: return [[C4]]
  return %0 : index
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@@ -320,18 +320,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
 // -----

 // CHECK-LABEL: func @dynamic_shape_dma_buffer
-func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
-  %c32 = constant 32 : index
+func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>, %Av: memref<? x ? x f32, 2>) {
  %num_elt = constant 512 : index
  %zero = constant 0 : index
-
-  %Av = memref.alloc(%c32, %c32) : memref<? x ? x f32, 2>
  %tag = memref.alloc() : memref<1 x i32>

 // Double buffering for dynamic shaped buffer.
-// CHECK:       memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 2>
-// CHECK-NEXT:  %[[C0:.*]] = constant 0 : index
-// CHECK-NEXT:  memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32, 2>
+// Note: Cannot capture C0 because there are multiple C0 constants in the IR.
+// CHECK:       memref.dim %{{.*}}, %{{.*}} : memref<?x?xf32, 2>
 // CHECK-NEXT:  %[[C1:.*]] = constant 1 : index
 // CHECK-NEXT:  memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32, 2>
 // CHECK-NEXT:  memref.alloc(%{{.*}}, %{{.*}}) : memref<2x?x?xf32, 2>
@@ -342,7 +338,6 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
      memref<? x ? x f32, 2>, memref<1 x i32>
    affine.dma_wait %tag[%zero], %num_elt : memref<1 x i32>
  }
-  memref.dealloc %Av : memref<? x ? x f32, 2>
  return
 // CHECK-NEXT:  affine.for %{{.*}} = 1 to 16 {
 // CHECK:         affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0, 0], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}}
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -11,7 +11,6 @@
 #include "TestInterfaces.h"
 #include "TestTypes.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -801,7 +800,7 @@ LogicalResult OpWithShapedTypeInferTypeInterfaceOp::reifyReturnTypeShapes(
    OpBuilder &builder, ValueRange operands,
    llvm::SmallVectorImpl<Value> &shapes) {
  shapes = SmallVector<Value, 1>{
-      builder.createOrFold<memref::DimOp>(getLoc(), operands.front(), 0)};
+      builder.createOrFold<tensor::DimOp>(getLoc(), operands.front(), 0)};
  return success();
 }

@@ -815,7 +814,7 @@ LogicalResult OpWithResultShapeInterfaceOp::reifyReturnTypeShapes(
        llvm::seq<int64_t>(
            0, operand.getType().cast<RankedTensorType>().getRank()),
        [&](int64_t dim) -> Value {
-          return builder.createOrFold<memref::DimOp>(loc, operand, dim);
+          return builder.createOrFold<tensor::DimOp>(loc, operand, dim);
        }));
    shapes.push_back(builder.create<tensor::FromElementsOp>(
        getLoc(), builder.getIndexType(), currShape));
@@ -834,7 +833,7 @@ OpWithResultShapePerDimInterfaceOp ::reifyReturnTypeShapesPerResultDim(
        llvm::seq<int64_t>(
            0, operand.getType().cast<RankedTensorType>().getRank()),
        [&](int64_t dim) -> Value {
-          return builder.createOrFold<memref::DimOp>(loc, operand, dim);
+          return builder.createOrFold<tensor::DimOp>(loc, operand, dim);
        }));
    shapes.emplace_back(std::move(currShape));
  }
@@ -851,7 +850,7 @@ LogicalResult OpWithResultShapeAndPerDimInterfaceOp::reifyReturnTypeShapes(
        llvm::seq<int64_t>(
            0, operand.getType().cast<RankedTensorType>().getRank()),
        [&](int64_t dim) -> Value {
-          return builder.createOrFold<memref::DimOp>(loc, operand, dim);
+          return builder.createOrFold<tensor::DimOp>(loc, operand, dim);
        }));
    shapes.push_back(builder.create<tensor::FromElementsOp>(
        getLoc(), builder.getIndexType(), currShape));
@@ -870,7 +869,7 @@ OpWithResultShapeAndPerDimInterfaceOp ::reifyReturnTypeShapesPerResultDim(
        llvm::seq<int64_t>(
            0, operand.getType().cast<RankedTensorType>().getRank()),
        [&](int64_t dim) -> Value {
-          return builder.createOrFold<memref::DimOp>(loc, operand, dim);
+          return builder.createOrFold<tensor::DimOp>(loc, operand, dim);
        }));
    shapes.emplace_back(std::move(currShape));
  }
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//

 #include "TestDialect.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -159,7 +159,7 @@ static void reifyReturnShape(Operation *op) {
 struct TestReturnTypeDriver
    : public PassWrapper<TestReturnTypeDriver, FunctionPass> {
  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<memref::MemRefDialect>();
+    registry.insert<tensor::TensorDialect>();
  }
  StringRef getArgument() const final { return "test-return-type"; }
  StringRef getDescription() const final { return "Run return type functions"; }