The greedy rewriter is used in many different flows and it has a lot of
convenience (work list management, debugging actions, tracing, etc). But
it combines two kinds of greedy behavior 1) how ops are matched, 2)
folding wherever it can.
These are independent forms of greedy and leads to inefficiency. E.g.,
cases where one need to create different phases in lowering and is
required to applying patterns in specific order split across different
passes. Using the driver one ends up needlessly retrying folding/having
multiple rounds of folding attempts, where one final run would have
sufficed.
Of course folks can locally avoid this behavior by just building their
own, but this is also a common requested feature that folks keep on
working around locally in suboptimal ways.
For downstream users, there should be no behavioral change. Updating
from the deprecated should just be a find and replace (e.g., `find ./
-type f -exec sed -i
's|applyPatternsAndFoldGreedily|applyPatternsGreedily|g' {} \;` variety)
as the API arguments hasn't changed between the two.
83 lines
2.8 KiB
C++
83 lines
2.8 KiB
C++
//===- XeGPUFoldAliasOps.cpp - XeGPU alias ops folders ----------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
|
|
|
|
#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h"
|
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
|
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
|
|
#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
namespace mlir {
|
|
namespace xegpu {
|
|
#define GEN_PASS_DEF_XEGPUFOLDALIASOPS
|
|
#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
|
|
} // namespace xegpu
|
|
} // namespace mlir
|
|
|
|
#define DEBUG_TYPE "xegpu-fold-alias-ops"
|
|
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
/// Merges subview operation with xegpu.create_nd_tdesc operation.
|
|
class XegpuCreateNdDescOpSubViewOpFolder final
|
|
: public OpRewritePattern<xegpu::CreateNdDescOp> {
|
|
public:
|
|
using OpRewritePattern<xegpu::CreateNdDescOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(xegpu::CreateNdDescOp descOp,
|
|
PatternRewriter &rewriter) const override;
|
|
};
|
|
} // namespace
|
|
|
|
LogicalResult XegpuCreateNdDescOpSubViewOpFolder::matchAndRewrite(
|
|
xegpu::CreateNdDescOp descOp, PatternRewriter &rewriter) const {
|
|
auto subViewOp = descOp.getSource().getDefiningOp<memref::SubViewOp>();
|
|
|
|
if (!subViewOp)
|
|
return rewriter.notifyMatchFailure(descOp, "not a subview producer");
|
|
if (!subViewOp.hasUnitStride())
|
|
return rewriter.notifyMatchFailure(descOp, "requires unit strides");
|
|
|
|
SmallVector<Value> resolvedOffsets;
|
|
affine::resolveIndicesIntoOpWithOffsetsAndStrides(
|
|
rewriter, descOp.getLoc(), subViewOp.getMixedOffsets(),
|
|
subViewOp.getMixedStrides(), subViewOp.getDroppedDims(),
|
|
descOp.getMixedOffsets(), resolvedOffsets);
|
|
|
|
rewriter.replaceOpWithNewOp<xegpu::CreateNdDescOp>(
|
|
descOp, descOp.getTensorDesc().getType(), subViewOp.getSource(),
|
|
getAsOpFoldResult(resolvedOffsets));
|
|
|
|
return success();
|
|
}
|
|
|
|
void xegpu::populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns) {
|
|
patterns.add<XegpuCreateNdDescOpSubViewOpFolder>(patterns.getContext());
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct XeGPUFoldAliasOpsPass final
|
|
: public xegpu::impl::XeGPUFoldAliasOpsBase<XeGPUFoldAliasOpsPass> {
|
|
void runOnOperation() override;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
void XeGPUFoldAliasOpsPass::runOnOperation() {
|
|
RewritePatternSet patterns(&getContext());
|
|
xegpu::populateXeGPUFoldAliasOpsPatterns(patterns);
|
|
(void)applyPatternsGreedily(getOperation(), std::move(patterns));
|
|
}
|