[mlir][linalg] Improve implementation of hoist padding.
Instead of relying on adhoc bounds calculations, use a projection-based implementation. This simplifies the implementation and finds more static constant sizes than previously/ Differential Revision: https://reviews.llvm.org/D106054
This commit is contained in:
67
mlir/include/mlir/Dialect/Linalg/Analysis/ConstraintsSet.h
Normal file
67
mlir/include/mlir/Dialect/Linalg/Analysis/ConstraintsSet.h
Normal file
@@ -0,0 +1,67 @@
|
||||
//===- ConstraintsSet.h - Extensions for FlatAffineConstraints --*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Linalg-specific constraints set extensions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_DIALECT_LINALG_ANALYSIS_CONSTRAINTS_SET_H_
|
||||
#define MLIR_DIALECT_LINALG_ANALYSIS_CONSTRAINTS_SET_H_
|
||||
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
|
||||
namespace mlir {
|
||||
class ValueRange;
|
||||
|
||||
/// Linalg-specific constraints set extensions.
|
||||
class ConstraintsSet : public FlatAffineConstraints {
|
||||
public:
|
||||
ConstraintsSet() : FlatAffineConstraints() {}
|
||||
|
||||
/// Assuming `val` is defined by `val = affine.min map (operands)`, introduce
|
||||
/// all the constraints `val >= expr_i(operands)`, where expr_i are all the
|
||||
/// results of `map`.
|
||||
// This API avoids taking a dependence on the AffineMinOp definition.
|
||||
LogicalResult composeMin(Value val, AffineMap map, ValueRange operands) {
|
||||
return composeMinOrMaxMapAndOperands(val, map, operands, /*min=*/true);
|
||||
}
|
||||
|
||||
/// Assuming `val` is defined by `val = affine.max map (operands)`, introduce
|
||||
/// all the constraints `val <= expr_i(operands)`, where expr_i are all the
|
||||
/// results of `map`.
|
||||
// This API avoids taking a dependence on the AffineMaxOp definition.
|
||||
LogicalResult composeMax(Value val, AffineMap map, ValueRange operands) {
|
||||
return composeMinOrMaxMapAndOperands(val, map, operands, /*min=*/false);
|
||||
}
|
||||
|
||||
/// Assuming `val` is defined by `val = affine.apply map (operands)`, call
|
||||
/// composeMap.
|
||||
// This API avoids taking a dependence on the AffineMApplyOp definition.
|
||||
LogicalResult composeAffineApply(Value val, AffineMap map,
|
||||
ValueRange operands);
|
||||
|
||||
/// Asserts the identifier `id` is in the constraints set and returns it.
|
||||
unsigned lookupPos(Value id) const;
|
||||
|
||||
/// If v is not in the constraint set, insert it as a dim or symbol depending
|
||||
/// on `asDim`.
|
||||
/// Return success if v is of dim id type when `asDim` is true and of symbol
|
||||
/// id type when `asDim` is false.
|
||||
/// Return failure otherwise.
|
||||
LogicalResult ensureIdOfType(Value v, bool asDim);
|
||||
|
||||
private:
|
||||
/// Implementation detail for composeMin/Max.
|
||||
LogicalResult composeMinOrMaxMapAndOperands(Value val, AffineMap map,
|
||||
ValueRange operands, bool min);
|
||||
};
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_LINALG_ANALYSIS_CONSTRAINTS_SET_H_
|
||||
@@ -1,12 +1,15 @@
|
||||
add_mlir_dialect_library(MLIRLinalgAnalysis
|
||||
ConstraintsSet.cpp
|
||||
DependenceAnalysis.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Linalg
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRAnalysis
|
||||
MLIRIR
|
||||
MLIRLinalg
|
||||
MLIRLoopAnalysis
|
||||
MLIRMemRef
|
||||
MLIRStandard
|
||||
)
|
||||
|
||||
87
mlir/lib/Dialect/Linalg/Analysis/ConstraintsSet.cpp
Normal file
87
mlir/lib/Dialect/Linalg/Analysis/ConstraintsSet.cpp
Normal file
@@ -0,0 +1,87 @@
|
||||
//===- ConstraintsSet.cpp - Extensions for FlatAffineConstraints ----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Linalg-specific constraints set extensions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/Linalg/Analysis/ConstraintsSet.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
unsigned ConstraintsSet::lookupPos(Value id) const {
|
||||
unsigned pos;
|
||||
if (!findId(id, &pos)) {
|
||||
llvm::errs() << "Lookup failed: " << id << "\n";
|
||||
llvm_unreachable("Lookup failed");
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
LogicalResult ConstraintsSet::ensureIdOfType(Value v, bool asDim) {
|
||||
if (!containsId(v)) {
|
||||
if (asDim)
|
||||
addDimId(getNumDimIds(), v);
|
||||
else
|
||||
addSymbolId(getNumSymbolIds(), v);
|
||||
return success();
|
||||
}
|
||||
unsigned pos = lookupPos(v);
|
||||
return success((asDim && pos < getNumDimIds()) ||
|
||||
(!asDim && getNumDimIds() <= pos &&
|
||||
pos < getNumDimIds() + getNumSymbolIds()));
|
||||
}
|
||||
|
||||
LogicalResult ConstraintsSet::composeAffineApply(Value val, AffineMap map,
|
||||
ValueRange operands) {
|
||||
AffineValueMap avm(map, operands, val);
|
||||
return composeMap(&avm);
|
||||
}
|
||||
|
||||
LogicalResult ConstraintsSet::composeMinOrMaxMapAndOperands(Value val,
|
||||
AffineMap map,
|
||||
ValueRange operands,
|
||||
bool min) {
|
||||
ConstraintsSet localCst;
|
||||
std::vector<SmallVector<int64_t, 8>> flatExprs;
|
||||
if (failed(getFlattenedAffineExprs(map, &flatExprs, &localCst)))
|
||||
return failure();
|
||||
assert(flatExprs.size() == map.getNumResults() &&
|
||||
"incorrect number of flattened expressiosn");
|
||||
|
||||
// Local vars on a per-need basis.
|
||||
if (localCst.getNumLocalIds() != 0)
|
||||
return failure();
|
||||
|
||||
// Add one inequality for each result connecting `val` to the other ids in
|
||||
// `operands`. For instance, uf the expression is:
|
||||
// `16 * i0 + i1` and
|
||||
// `min` is true
|
||||
// add:
|
||||
// -d_val + 16 * i0 + i1 >= 0.
|
||||
for (const auto &flatExpr : flatExprs) {
|
||||
assert(flatExpr.size() >= operands.size() + 1);
|
||||
SmallVector<int64_t, 8> ineq(getNumCols(), 0);
|
||||
for (unsigned i = 0, e = operands.size(); i < e; i++)
|
||||
ineq[lookupPos(operands[i])] = min ? flatExpr[i] : -flatExpr[i];
|
||||
|
||||
// Set the coefficient for `d_val`.
|
||||
ineq[lookupPos(val)] = min ? -1 : 1;
|
||||
|
||||
// Set the constant term (upper bound in flatExpr is exclusive).
|
||||
ineq[getNumCols() - 1] = min ? flatExpr[flatExpr.size() - 1] - 1
|
||||
: -flatExpr[flatExpr.size() - 1];
|
||||
|
||||
// Add the inequality connecting the result of the map to the rest.
|
||||
addInequality(ineq);
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
@@ -12,8 +12,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/Analysis/SliceAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/Utils.h"
|
||||
#include "mlir/Dialect/Linalg/Analysis/ConstraintsSet.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
@@ -530,97 +532,6 @@ bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) {
|
||||
return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>();
|
||||
}
|
||||
|
||||
/// Compute the tightest lower bound with quantities that are all defined
|
||||
/// outside of `outer`.
|
||||
/// Return null if such a bound cannot be computed.
|
||||
Value computeLoopIndependentLowerBound(OpBuilder &b, scf::ForOp outer,
|
||||
Value v) {
|
||||
if (isDefinedOutsideOrConstant(outer, v))
|
||||
return v;
|
||||
return Value();
|
||||
}
|
||||
|
||||
/// Compute the tightest upper bound with quantities that are all defined
|
||||
/// outside of `outer`.
|
||||
/// Expects all ops in the backward slice of `v` up to `outer` to be either
|
||||
/// scf.for, affine.min or affine.apply.
|
||||
static Value computeLoopIndependentUpperBound(OpBuilder &b, scf::ForOp outer,
|
||||
Value v) {
|
||||
if (isDefinedOutsideOrConstant(outer, v))
|
||||
return v;
|
||||
|
||||
LLVM_DEBUG(DBGS() << "Begin loopIndependentUpperBound for: " << v << "\n");
|
||||
|
||||
bool ok =
|
||||
backwardsSliceOnlyHasOpsOfType<scf::ForOp, AffineMinOp, AffineApplyOp>(
|
||||
outer, v);
|
||||
assert(ok && "expected to only be defined by scf::ForOp and AffineMinOp");
|
||||
(void)ok;
|
||||
|
||||
// Compute a backward slice up to, but not including, `outer`.
|
||||
SetVector<Operation *> backwardSlice;
|
||||
getBackwardSlice(v, &backwardSlice,
|
||||
[&](Operation *op) { return outer->isProperAncestor(op); });
|
||||
backwardSlice.insert(v.getDefiningOp());
|
||||
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPoint(outer);
|
||||
Value res = v;
|
||||
BlockAndValueMapping bvm;
|
||||
for (Operation *op : backwardSlice) {
|
||||
if (isa<scf::ForOp>(op))
|
||||
continue;
|
||||
if (isa<AffineApplyOp>(op)) {
|
||||
b.clone(*op, bvm);
|
||||
continue;
|
||||
}
|
||||
auto sliceMinOp = cast<AffineMinOp>(op);
|
||||
GetMinMaxExprFn getSCFMinMax = [&](Value value,
|
||||
SmallVectorImpl<Value> &dims,
|
||||
SmallVectorImpl<Value> &symbols) {
|
||||
return getSCFMinMaxExpr(value, dims, symbols, [&](Operation *op) {
|
||||
return outer->isAncestor(op);
|
||||
});
|
||||
};
|
||||
// Perform the substitution of the operands of AffineMinOp.
|
||||
auto mapAndOperands = substituteMin(sliceMinOp, getSCFMinMax);
|
||||
SmallVector<Value> resultOperands = mapAndOperands.dims;
|
||||
llvm::append_range(resultOperands, mapAndOperands.symbols);
|
||||
AffineMap map = mapAndOperands.map;
|
||||
canonicalizeMapAndOperands(&map, &resultOperands);
|
||||
map = simplifyAffineMap(map);
|
||||
res = b.create<AffineMinOp>(
|
||||
outer->getLoc(), map,
|
||||
llvm::to_vector<4>(llvm::map_range(resultOperands, [&](Value operand) {
|
||||
return bvm.lookupOrDefault(operand);
|
||||
})));
|
||||
bvm.map(sliceMinOp, res);
|
||||
}
|
||||
LLVM_DEBUG(DBGS() << "End loopIndependentUpperBound with: " << res << "\n");
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Return the number of iterations in the loop (ub - lb).ceilDiv(step).
|
||||
/// The returned Value is guaranteed not to depend on any loop comprised in
|
||||
/// [`outer`, `forOp`].
|
||||
/// Return null if such a loop-independent quantity cannot be computed.
|
||||
static Value buildLoopTripCount(OpBuilder &b, scf::ForOp outer,
|
||||
scf::ForOp forOp) {
|
||||
MLIRContext *ctx = forOp->getContext();
|
||||
AffineExpr lb, ub, step;
|
||||
bindDims(ctx, lb, ub);
|
||||
bindSymbols(ctx, step);
|
||||
Value lbVal = computeLoopIndependentLowerBound(b, outer, forOp.lowerBound()),
|
||||
ubVal = computeLoopIndependentUpperBound(b, outer, forOp.upperBound()),
|
||||
stepVal = forOp.step();
|
||||
if (!lbVal || !ubVal || !stepVal)
|
||||
return Value();
|
||||
auto loc = forOp->getLoc();
|
||||
Value res = b.create<AffineApplyOp>(loc, (ub - lb).ceilDiv(step),
|
||||
ValueRange{lbVal, ubVal, stepVal});
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Return the current iteration number in the loop (iv - lb).ceilDiv(step).
|
||||
/// The returned Value is guaranteed not to depend on any loop comprised in
|
||||
/// [`outer`, `forOp`].
|
||||
@@ -631,14 +542,135 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
|
||||
AffineExpr iv, lb, step;
|
||||
bindDims(ctx, iv, lb);
|
||||
bindSymbols(ctx, step);
|
||||
Value ivVal = forOp.getInductionVar(),
|
||||
lbVal = computeLoopIndependentLowerBound(b, outer, forOp.lowerBound()),
|
||||
stepVal = forOp.step();
|
||||
if (!ivVal || !lbVal || !stepVal)
|
||||
if (!isDefinedOutsideOrConstant(outer, forOp.lowerBound()) ||
|
||||
!isDefinedOutsideOrConstant(outer, forOp.step()))
|
||||
return Value();
|
||||
Value ivVal = forOp.getInductionVar(), lbVal = forOp.lowerBound(),
|
||||
stepVal = forOp.step();
|
||||
auto loc = forOp->getLoc();
|
||||
return b.create<AffineApplyOp>(loc, (iv - lb).ceilDiv(step),
|
||||
ValueRange{ivVal, lbVal, stepVal});
|
||||
return b.createOrFold<AffineApplyOp>(loc, (iv - lb).ceilDiv(step),
|
||||
ValueRange{ivVal, lbVal, stepVal});
|
||||
}
|
||||
|
||||
/// Given a set of loops, assumed to be scf::ForOp, create a constraint set
|
||||
/// containing the inequalities `iv - lb >= 0` and `-iv + ub >= 0` for each
|
||||
/// loop.
|
||||
static ConstraintsSet initLoopIvsAndBounds(ArrayRef<Operation *> loops) {
|
||||
ConstraintsSet constraints;
|
||||
for (Operation *op : loops)
|
||||
constraints.addDimId(constraints.getNumDimIds(),
|
||||
cast<scf::ForOp>(op).getInductionVar());
|
||||
for (Operation *op : loops)
|
||||
constraints.addDimId(constraints.getNumDimIds(),
|
||||
cast<scf::ForOp>(op).lowerBound());
|
||||
for (Operation *op : loops)
|
||||
constraints.addDimId(constraints.getNumDimIds(),
|
||||
cast<scf::ForOp>(op).upperBound());
|
||||
unsigned numLoops = loops.size();
|
||||
for (unsigned ivIdx = 0, e = numLoops; ivIdx < e; ++ivIdx) {
|
||||
// iv - lb >= 0
|
||||
SmallVector<int64_t, 8> ineqLb(constraints.getNumCols(), 0);
|
||||
ineqLb[ivIdx] = 1;
|
||||
ineqLb[ivIdx + numLoops] = -1;
|
||||
// -iv + ub >= 0
|
||||
SmallVector<int64_t, 8> ineqUb(constraints.getNumCols(), 0);
|
||||
ineqUb[ivIdx] = -1;
|
||||
ineqUb[ivIdx + 2 * numLoops] = 1;
|
||||
ineqUb[constraints.getNumCols() - 1] = -1;
|
||||
constraints.addInequality(ineqLb);
|
||||
constraints.addInequality(ineqUb);
|
||||
}
|
||||
return constraints;
|
||||
}
|
||||
|
||||
/// For each loop in `loops`, determine the ops involved in the construction of
|
||||
/// its upper bound---up to the outerLimit loop--- and fold them as new
|
||||
/// inequalities in the constraint set.
|
||||
/// This is achieved by computing the backwardSlice of the loop's upper bound
|
||||
/// and iteratively folding each op in reverse topological order to guarantee
|
||||
/// use-def ordering.
|
||||
/// As operations are folded in, their result is projected out of the
|
||||
/// constraints set.
|
||||
/// The following operations are supported:
|
||||
/// - scf::ForOp are simply skipped.
|
||||
/// - AffineApplyOp are composed to replace the result by an equality.
|
||||
/// - AffineMinOp are composed by adding each entry as an upper bound.
|
||||
/// If any other operation is met, return failure.
|
||||
// TODO: extend on a per-need basis.
|
||||
static LogicalResult
|
||||
foldUpperBoundsIntoConstraintsSet(ConstraintsSet &constraints,
|
||||
scf::ForOp outerLimit,
|
||||
ArrayRef<Operation *> loops) {
|
||||
SetVector<Value> toProjectOut;
|
||||
for (Operation *loop : loops) {
|
||||
auto ub = cast<scf::ForOp>(loop).upperBound();
|
||||
if (isDefinedOutsideOrConstant(outerLimit, ub))
|
||||
continue;
|
||||
|
||||
// Compute a backward slice up to, but not including, `outerLimit`.
|
||||
SetVector<Operation *> backwardSlice;
|
||||
getBackwardSlice(ub, &backwardSlice, [&](Operation *op) {
|
||||
return outerLimit->isProperAncestor(op);
|
||||
});
|
||||
backwardSlice.insert(ub.getDefiningOp());
|
||||
|
||||
// Iterate over all ops in the slice and compose them in the constraints.
|
||||
for (Operation *op : llvm::reverse(backwardSlice)) {
|
||||
if (!isa<scf::ForOp, AffineApplyOp, AffineMinOp>(op))
|
||||
return failure();
|
||||
if (isa<scf::ForOp>(op))
|
||||
continue;
|
||||
// Ensure there is a
|
||||
auto ensureIdFailed = [&](Value v) {
|
||||
return failed(constraints.ensureIdOfType(v, /*asDim=*/true));
|
||||
};
|
||||
|
||||
// Ensure all ids exist and add results for later projection.
|
||||
if (llvm::any_of(op->getResults(), ensureIdFailed) ||
|
||||
llvm::any_of(op->getOperands(), ensureIdFailed))
|
||||
return failure();
|
||||
|
||||
// All supported ops have 1 result.
|
||||
// TODO: extend when needed.
|
||||
toProjectOut.insert(op->getResult(0));
|
||||
|
||||
// Compose supported ops.
|
||||
if (auto affineApplyOp = dyn_cast<AffineApplyOp>(op)) {
|
||||
if (failed(constraints.composeAffineApply(affineApplyOp.getResult(),
|
||||
affineApplyOp.getAffineMap(),
|
||||
affineApplyOp.getOperands())))
|
||||
return failure();
|
||||
continue;
|
||||
}
|
||||
auto affineMinOp = cast<AffineMinOp>(op);
|
||||
if (failed(constraints.composeMin(affineMinOp.getResult(),
|
||||
affineMinOp.getAffineMap(),
|
||||
affineMinOp.operands())))
|
||||
return failure();
|
||||
}
|
||||
}
|
||||
for (Value v : toProjectOut)
|
||||
constraints.projectOut(v);
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Compute dynamic tensor sizes, independent of any value defined inside
|
||||
/// `outer` and such that every n-D iteration of the packingLoops has its own
|
||||
/// space (so that each packed buffer has a storage location). This is achieved
|
||||
/// by computing the extent for each of the packing loops.
|
||||
static LogicalResult computeBounds(scf::ForOp outer,
|
||||
ArrayRef<Operation *> packingLoops,
|
||||
SmallVector<AffineMap> &lbs,
|
||||
SmallVector<AffineMap> &ubs) {
|
||||
// Packing loop IVs are introduced as the first positions.
|
||||
ConstraintsSet constraints = initLoopIvsAndBounds(packingLoops);
|
||||
if (failed(
|
||||
foldUpperBoundsIntoConstraintsSet(constraints, outer, packingLoops)))
|
||||
return failure();
|
||||
// Compute the bounds of the first positions, assuming the others are fixed.
|
||||
constraints.getSliceBounds(/*pos=*/0, /*num=*/packingLoops.size(),
|
||||
outer->getContext(), &lbs, &ubs);
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Ensure prerequisites that guarantee pad op hoisting can occur.
|
||||
@@ -725,28 +757,49 @@ hoistPaddingOnTensorsPrerequisites(linalg::PadTensorOp padTensorOp, int nLevels,
|
||||
assert(outermostEnclosingForOp == backwardSlice.front());
|
||||
|
||||
scf::ForOp outer = cast<scf::ForOp>(outermostEnclosingForOp);
|
||||
if (llvm::any_of(packingLoops, [&](Operation *op) {
|
||||
scf::ForOp forOp = cast<scf::ForOp>(op);
|
||||
Value lb = forOp.lowerBound(), ub = forOp.upperBound(),
|
||||
step = forOp.step();
|
||||
return !isDefinedOutsideOrConstant(outer, lb) ||
|
||||
!(isDefinedOutsideOrConstant(outer, ub) ||
|
||||
backwardsSliceOnlyHasOpsOfType<scf::ForOp, AffineMinOp,
|
||||
AffineApplyOp>(outer, ub)) ||
|
||||
!isDefinedOutsideOrConstant(outer, step);
|
||||
}))
|
||||
|
||||
ConstraintsSet constraints = initLoopIvsAndBounds(packingLoops.getArrayRef());
|
||||
if (failed(foldUpperBoundsIntoConstraintsSet(constraints, outer,
|
||||
packingLoops.getArrayRef())))
|
||||
return failure();
|
||||
|
||||
unsigned numLoops = packingLoops.size();
|
||||
SmallVector<AffineMap> lbs(numLoops), ubs(numLoops);
|
||||
if (failed(computeBounds(outer, packingLoops.getArrayRef(), lbs, ubs)))
|
||||
return failure();
|
||||
|
||||
SmallVector<Value> allValues;
|
||||
constraints.getAllIdValues(&allValues);
|
||||
SmallVector<Value> allNonLoopValues(allValues.begin() + numLoops,
|
||||
allValues.end());
|
||||
|
||||
// For each packingLoop, create the extent by (ub - lb).ceilDiv(step).
|
||||
// IP just before the outermost loop considered that we hoist above.
|
||||
OpBuilder b(outermostEnclosingForOp);
|
||||
dynamicTensorSizes =
|
||||
llvm::to_vector<4>(llvm::map_range(packingLoops, [&](Operation *op) {
|
||||
return buildLoopTripCount(b, cast<scf::ForOp>(outermostEnclosingForOp),
|
||||
cast<scf::ForOp>(op));
|
||||
}));
|
||||
// Assert all loop trip counts can be computed.
|
||||
if (!llvm::all_of(dynamicTensorSizes, [](Value v) { return v; }))
|
||||
llvm_unreachable("loop independence prerequisite not met");
|
||||
ImplicitLocOpBuilder b(outer->getLoc(), outer);
|
||||
assert(packingLoops.size() == lbs.size() && "expected matching lb sizes");
|
||||
assert(packingLoops.size() == ubs.size() && "expected matching ub sizes");
|
||||
for (auto it : llvm::zip(packingLoops, lbs, ubs)) {
|
||||
scf::ForOp loop = cast<scf::ForOp>(std::get<0>(it));
|
||||
AffineMap lbMap = std::get<1>(it);
|
||||
AffineMap ubMap = std::get<2>(it);
|
||||
SmallVector<Value> lbOperands(allNonLoopValues);
|
||||
canonicalizeMapAndOperands(&lbMap, &lbOperands);
|
||||
Value lbVal = b.createOrFold<AffineMaxOp>(lbMap, lbOperands);
|
||||
|
||||
SmallVector<Value> ubOperands(allNonLoopValues);
|
||||
canonicalizeMapAndOperands(&ubMap, &ubOperands);
|
||||
Value ubVal = b.createOrFold<AffineMinOp>(ubMap, ubOperands);
|
||||
|
||||
AffineExpr lb, ub, step;
|
||||
bindDims(b.getContext(), lb, ub);
|
||||
bindSymbols(b.getContext(), step);
|
||||
Value res = b.createOrFold<AffineApplyOp>(
|
||||
(ub - lb).ceilDiv(step),
|
||||
ValueRange{lbVal, ubVal, cast<scf::ForOp>(loop).step()});
|
||||
|
||||
dynamicTensorSizes.push_back(res);
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
|
||||
@@ -141,8 +141,10 @@ func @matmul_tensors(
|
||||
|
||||
// -----
|
||||
|
||||
|
||||
// CHECK-DAG: #[[$MIN_REST8:[0-9a-z]+]] = affine_map<(d0)[s0] -> (8, -d0 + s0)>
|
||||
// CHECK-DAG: #[[$MIN_MOD4:[0-9a-z]+]] = affine_map<(d0) -> (4, d0 - ((d0 - 1) floordiv 4) * 4)>
|
||||
// CHECK-DAG: #[[$MIN_REST4:[0-9a-z]+]] = affine_map<(d0, d1) -> (4, d0 - d1)>
|
||||
// CHECK-DAG: #[[$MIN_REST2:[0-9a-z]+]] = affine_map<(d0, d1) -> (2, d0 - d1)>
|
||||
// CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)>
|
||||
// CHECK-DAG: #[[$DIV2:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 2)>
|
||||
#map0 = affine_map<(d0)[s0] -> (8, -d0 + s0)>
|
||||
@@ -167,20 +169,18 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
|
||||
//
|
||||
// CHECK: %[[MR8:.*]] = affine.min #[[$MIN_REST8]](%[[I]])
|
||||
// CHECK: %[[D0:.*]] = affine.apply #[[$DIV4]](%[[MR8]])
|
||||
// CHECK: %[[MM4:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]])
|
||||
// CHECK: %[[D1:.*]] = affine.apply #[[$DIV2]](%[[MM4]])
|
||||
// Init tensor and pack.
|
||||
// CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], %[[D1]], 2] : tensor<?x?x2xf32>
|
||||
// CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_A]]) -> (tensor<?x?x2xf32>) {
|
||||
// CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], 2, 2] : tensor<?x2x2xf32>
|
||||
// CHECK: %[[CAST_INIT_PACKED_A:.*]] = tensor.cast %[[INIT_PACKED_A]] : tensor<?x2x2xf32> to tensor<?x?x2xf32>
|
||||
// CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_A]]) -> (tensor<?x?x2xf32>) {
|
||||
// CHECK: scf.for %[[III:[0-9a-z]+]] =
|
||||
// CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
|
||||
//
|
||||
// CHECK: %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]])
|
||||
// CHECK: %[[MM4_2:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]])
|
||||
// CHECK: %[[D1_2:.*]] = affine.apply #[[$DIV2]](%[[MM4_2]])
|
||||
// Init tensor and pack.
|
||||
// CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], %[[D1_2]], 2] : tensor<?x?x2xf32>
|
||||
// CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_B]]) -> (tensor<?x?x2xf32>) {
|
||||
// CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], 2, 2] : tensor<?x2x2xf32>
|
||||
// CHECK: %[[CAST_INIT_PACKED_B:.*]] = tensor.cast %[[INIT_PACKED_B]] : tensor<?x2x2xf32> to tensor<?x?x2xf32>
|
||||
// CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_B]]) -> (tensor<?x?x2xf32>) {
|
||||
// CHECK: scf.for %[[III_2:[0-9a-z]+]] =
|
||||
// CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
|
||||
// Compute.
|
||||
|
||||
Reference in New Issue
Block a user