Files
clang-p2996/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
lonely eagle 2ecf608829 [mlir]Fix compose subview (#80551)
I found a bug in `test-compose-subview`,You can see the example I gave.
```
#map = affine_map<() -> ()>
module {
  func.func private @fun(%arg0: memref<10x10xf32>, %arg1: memref<5x5xf32>) -> memref<5x5xf32> {
    %c0 = arith.constant 0 : index
    %c5 = arith.constant 5 : index
    %c1 = arith.constant 1 : index
    %subview = memref.subview %arg0[0, 0] [5, 5] [1, 1] : memref<10x10xf32> to memref<5x5xf32, strided<[10, 1]>>
    %alloc = memref.alloc() : memref<5x5xf32>
    scf.for %arg2 = %c0 to %c5 step %c1 {
      scf.for %arg3 = %c0 to %c5 step %c1 {
        %subview_0 = memref.subview %subview[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32, strided<[10, 1]>> to memref<f32, strided<[], offset: ?>>
        %subview_1 = memref.subview %arg1[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
        %alloc_2 = memref.alloc() : memref<f32>
        linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%subview_0, %subview_1 : memref<f32, strided<[], offset: ?>>, memref<f32, strided<[], offset: ?>>) outs(%alloc_2 : memref<f32>) {
        ^bb0(%in: f32, %in_4: f32, %out: f32):
          %0 = arith.addf %in, %in_4 : f32
          linalg.yield %0 : f32
        }
        %subview_3 = memref.subview %alloc[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
        memref.copy %alloc_2, %subview_3 : memref<f32> to memref<f32, strided<[], offset: ?>>
      }
    }
    return %alloc : memref<5x5xf32>
  }
  func.func @test(%arg0: memref<10x10xf32>, %arg1: memref<5x5xf32>) -> memref<5x5xf32> {
    %0 = call @fun(%arg0, %arg1) : (memref<10x10xf32>, memref<5x5xf32>) -> memref<5x5xf32>
    return %0 : memref<5x5xf32>
  }
}
```
When I run `mlir-opt test.mlir ---test-compose-subview`.
```
test.mlir:14:9: error: 'linalg.generic' op expected operand rank (2) to match the result rank of indexing_map #0 (0)
        linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%subview_0, %subview_1 : memref<f32, strided<[], offset: ?>>, memref<f32, strided<[], offset: ?>>) outs(%alloc_2 : memref<f32>) {
        ^
test1.mlir:14:9: note: see current operation: 
"linalg.generic"(%4, %5, %6) <{indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = [], operandSegmentSizes = array<i32: 2, 1>}> ({
^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
  %8 = "arith.addf"(%arg4, %arg5) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
  "linalg.yield"(%8) : (f32) -> ()
}) : (memref<1x1xf32, strided<[10, 1], offset: ?>>, memref<f32, strided<[], offset: ?>>, memref<f32>) -> ()
```
This PR fixes that.In the meantime I've extended this PR to handle cases
where stride is greater than 1.
```
func.func private @Unknown0(%arg0: memref<10x10xf32>, %arg1: memref<5x5xf32>) -> memref<5x5xf32> {
  %c0 = arith.constant 0 : index
  %c5 = arith.constant 5 : index
  %c1 = arith.constant 1 : index
  %subview = memref.subview %arg0[0, 0] [5, 5] [2, 2] : memref<10x10xf32> to memref<5x5xf32, strided<[20, 2]>>
  %alloc = memref.alloc() : memref<5x5xf32>
  scf.for %arg2 = %c0 to %c5 step %c1 {
    scf.for %arg3 = %c0 to %c5 step %c1 {
      %subview_0 = memref.subview %subview[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32, strided<[20, 2]>> to memref<f32, strided<[], offset: ?>>
      %subview_1 = memref.subview %arg1[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
      %alloc_2 = memref.alloc() : memref<f32>
      linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%subview_0, %subview_1 : memref<f32, strided<[], offset: ?>>, memref<f32, strided<[], offset: ?>>) outs(%alloc_2 : memref<f32>) {
      ^bb0(%in: f32, %in_4: f32, %out: f32):
        %0 = arith.addf %in, %in_4 : f32
        linalg.yield %0 : f32
      }
      %subview_3 = memref.subview %alloc[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
      memref.copy %alloc_2, %subview_3 : memref<f32> to memref<f32, strided<[], offset: ?>>
    }
  }
  return %alloc : memref<5x5xf32>
}
$ mlir-opt test.mlir -test-compose-subview
#map = affine_map<()[s0] -> (s0 * 2)>
#map1 = affine_map<() -> ()>
module {
  func.func private @Unknown0(%arg0: memref<10x10xf32>, %arg1: memref<5x5xf32>) -> memref<5x5xf32>  {
    %c0 = arith.constant 0 : index
    %c5 = arith.constant 5 : index
    %c1 = arith.constant 1 : index
    %alloc = memref.alloc() : memref<5x5xf32>
    scf.for %arg2 = %c0 to %c5 step %c1 {
      scf.for %arg3 = %c0 to %c5 step %c1 {
        %0 = affine.apply #map()[%arg2]
        %1 = affine.apply #map()[%arg3]
        %subview = memref.subview %arg0[%0, %1] [1, 1] [2, 2] : memref<10x10xf32> to memref<f32, strided<[], offset: ?>>
        %subview_0 = memref.subview %arg1[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
        %alloc_1 = memref.alloc() : memref<f32>
        linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = []} ins(%subview, %subview_0 : memref<f32, strided<[], offset: ?>>, memref<f32, strided<[], offset: ?>>) outs(%alloc_1 : memref<f32>) {
        ^bb0(%in: f32, %in_3: f32, %out: f32):
          %2 = arith.addf %in, %in_3 : f32
          linalg.yield %2 : f32
        }
        %subview_2 = memref.subview %alloc[%arg2, %arg3] [1, 1] [1, 1] : memref<5x5xf32> to memref<f32, strided<[], offset: ?>>
        memref.copy %alloc_1, %subview_2 : memref<f32> to memref<f32, strided<[], offset: ?>>
      }
    }
    return %alloc : memref<5x5xf32>
  }
}
```
2024-02-07 20:49:27 +01:00

148 lines
6.5 KiB
C++

//===- ComposeSubView.cpp - Combining composed subview ops ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains patterns for combining composed subview ops (i.e. subview
// of a subview becomes a single subview).
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/MemRef/Transforms/ComposeSubView.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
using namespace mlir;
namespace {
// Replaces a subview of a subview with a single subview(both static and dynamic
// offsets are supported).
struct ComposeSubViewOpPattern : public OpRewritePattern<memref::SubViewOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(memref::SubViewOp op,
PatternRewriter &rewriter) const override {
// 'op' is the 'SubViewOp' we're rewriting. 'sourceOp' is the op that
// produces the input of the op we're rewriting (for 'SubViewOp' the input
// is called the "source" value). We can only combine them if both 'op' and
// 'sourceOp' are 'SubViewOp'.
auto sourceOp = op.getSource().getDefiningOp<memref::SubViewOp>();
if (!sourceOp)
return failure();
// A 'SubViewOp' can be "rank-reducing" by eliminating dimensions of the
// output memref that are statically known to be equal to 1. We do not
// allow 'sourceOp' to be a rank-reducing subview because then our two
// 'SubViewOp's would have different numbers of offset/size/stride
// parameters (just difficult to deal with, not impossible if we end up
// needing it).
if (sourceOp.getSourceType().getRank() != sourceOp.getType().getRank()) {
return failure();
}
// Offsets, sizes and strides OpFoldResult for the combined 'SubViewOp'.
SmallVector<OpFoldResult> offsets, sizes, strides,
opStrides = op.getMixedStrides(),
sourceStrides = sourceOp.getMixedStrides();
// The output stride in each dimension is equal to the product of the
// dimensions corresponding to source and op.
int64_t sourceStrideValue;
for (auto &&[opStride, sourceStride] :
llvm::zip(opStrides, sourceStrides)) {
Attribute opStrideAttr = dyn_cast_if_present<Attribute>(opStride);
Attribute sourceStrideAttr = dyn_cast_if_present<Attribute>(sourceStride);
if (!opStrideAttr || !sourceStrideAttr)
return failure();
sourceStrideValue = cast<IntegerAttr>(sourceStrideAttr).getInt();
strides.push_back(rewriter.getI64IntegerAttr(
cast<IntegerAttr>(opStrideAttr).getInt() * sourceStrideValue));
}
// The rules for calculating the new offsets and sizes are:
// * Multiple subview offsets for a given dimension compose additively.
// ("Offset by m and Stride by k" followed by "Offset by n" == "Offset by
// m + n * k")
// * Multiple sizes for a given dimension compose by taking the size of the
// final subview and ignoring the rest. ("Take m values" followed by "Take
// n values" == "Take n values") This size must also be the smallest one
// by definition (a subview needs to be the same size as or smaller than
// its source along each dimension; presumably subviews that are larger
// than their sources are disallowed by validation).
for (auto &&[opOffset, sourceOffset, sourceStride, opSize] :
llvm::zip(op.getMixedOffsets(), sourceOp.getMixedOffsets(),
sourceOp.getMixedStrides(), op.getMixedSizes())) {
// We only support static sizes.
if (opSize.is<Value>()) {
return failure();
}
sizes.push_back(opSize);
Attribute opOffsetAttr = llvm::dyn_cast_if_present<Attribute>(opOffset),
sourceOffsetAttr =
llvm::dyn_cast_if_present<Attribute>(sourceOffset),
sourceStrideAttr =
llvm::dyn_cast_if_present<Attribute>(sourceStride);
if (opOffsetAttr && sourceOffsetAttr) {
// If both offsets are static we can simply calculate the combined
// offset statically.
offsets.push_back(rewriter.getI64IntegerAttr(
cast<IntegerAttr>(opOffsetAttr).getInt() *
cast<IntegerAttr>(sourceStrideAttr).getInt() +
cast<IntegerAttr>(sourceOffsetAttr).getInt()));
} else {
AffineExpr expr;
SmallVector<Value> affineApplyOperands;
// Make 'expr' add 'sourceOffset'.
if (auto attr = llvm::dyn_cast_if_present<Attribute>(sourceOffset)) {
expr =
rewriter.getAffineConstantExpr(cast<IntegerAttr>(attr).getInt());
} else {
expr = rewriter.getAffineSymbolExpr(affineApplyOperands.size());
affineApplyOperands.push_back(sourceOffset.get<Value>());
}
// Multiply 'opOffset' by 'sourceStride' and make the 'expr' add the
// result.
if (auto attr = llvm::dyn_cast_if_present<Attribute>(opOffset)) {
expr = expr + cast<IntegerAttr>(attr).getInt() *
cast<IntegerAttr>(sourceStrideAttr).getInt();
} else {
expr =
expr + rewriter.getAffineSymbolExpr(affineApplyOperands.size()) *
cast<IntegerAttr>(sourceStrideAttr).getInt();
affineApplyOperands.push_back(opOffset.get<Value>());
}
AffineMap map = AffineMap::get(0, affineApplyOperands.size(), expr);
Value result = rewriter.create<affine::AffineApplyOp>(
op.getLoc(), map, affineApplyOperands);
offsets.push_back(result);
}
}
// This replaces 'op' but leaves 'sourceOp' alone; if it no longer has any
// uses it can be removed by a (separate) dead code elimination pass.
rewriter.replaceOpWithNewOp<memref::SubViewOp>(
op, op.getType(), sourceOp.getSource(), offsets, sizes, strides);
return success();
}
};
} // namespace
void mlir::memref::populateComposeSubViewPatterns(RewritePatternSet &patterns,
MLIRContext *context) {
patterns.add<ComposeSubViewOpPattern>(context);
}