331 lines
12 KiB
C++
331 lines
12 KiB
C++
//===- CodegenEnv.cpp - Code generation environment class ----------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "CodegenEnv.h"
|
|
|
|
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
|
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"
|
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
|
|
|
#include <optional>
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::sparse_tensor;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment helper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Returns true if tensor materializes uninitialized into the computation.
|
|
static bool isMaterializing(Value val) {
|
|
return val.getDefiningOp<tensor::EmptyOp>() ||
|
|
val.getDefiningOp<bufferization::AllocTensorOp>();
|
|
}
|
|
|
|
/// Makes target array's elements sorted according to the `order` array.
|
|
static void sortArrayBasedOnOrder(std::vector<LoopCoeffPair> &target,
|
|
ArrayRef<LoopId> order) {
|
|
std::sort(target.begin(), target.end(),
|
|
[&order](const LoopCoeffPair &l, const LoopCoeffPair &r) {
|
|
assert(std::addressof(l) == std::addressof(r) || l != r);
|
|
int idxL = -1, idxR = -1;
|
|
for (int i = 0, e = order.size(); i < e; i++) {
|
|
if (order[i] == l.first)
|
|
idxL = i;
|
|
if (order[i] == r.first)
|
|
idxR = i;
|
|
}
|
|
assert(idxL >= 0 && idxR >= 0);
|
|
return idxL < idxR;
|
|
});
|
|
}
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment constructor and general methods
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
|
|
unsigned numTensors, unsigned numLoops,
|
|
unsigned numFilterLoops, unsigned maxRank)
|
|
: linalgOp(linop), sparseOptions(opts),
|
|
latticeMerger(numTensors, numLoops, numFilterLoops, maxRank),
|
|
loopEmitter(), topSort(), sparseOut(nullptr), outerParNest(-1u),
|
|
insChain(), expValues(), expFilled(), expAdded(), expCount(), redVal(),
|
|
redExp(detail::kInvalidId), redCustom(detail::kInvalidId),
|
|
redValidLexInsert() {}
|
|
|
|
LogicalResult CodegenEnv::initTensorExp() {
|
|
// Builds the tensor expression for the Linalg operation in SSA form.
|
|
std::optional<ExprId> optExp = latticeMerger.buildTensorExpFromLinalg(op());
|
|
if (!optExp || !isAdmissibleTensorExp(*optExp))
|
|
return failure();
|
|
|
|
tensorExp = *optExp;
|
|
return success();
|
|
}
|
|
|
|
void CodegenEnv::startEmit() {
|
|
assert(insChain == nullptr && "must only start emitting once");
|
|
if (sparseOut) {
|
|
insChain = sparseOut->get();
|
|
latticeMerger.setHasSparseOut(true);
|
|
}
|
|
|
|
// Sort the related loop array such that they are in the same order as they
|
|
// appears on the topoOrder.
|
|
// TODO: since we only handle affine addition for slice based codegen, and
|
|
// addition is assoicative, the order how we evaluate the expression does
|
|
// not matter. However, to support multiplication, the order of the loop
|
|
// index should match the evaluation order to the affine expression AST.
|
|
|
|
// Initialize loop emitter.
|
|
SmallVector<Value> tensors; // input tensors passed to loop emitter
|
|
for (OpOperand &t : linalgOp->getOpOperands()) {
|
|
tensors.push_back(t.get());
|
|
const TensorId tid = makeTensorId(t.getOperandNumber());
|
|
const Level lvlRank = linalgOp.getMatchingIndexingMap(&t).getNumResults();
|
|
const auto enc = getSparseTensorEncoding(t.get().getType());
|
|
(void)enc;
|
|
assert(!enc || lvlRank == enc.getLvlRank());
|
|
for (Level lvl = 0; lvl < lvlRank; lvl++)
|
|
sortArrayBasedOnOrder(latticeMerger.getDependentLoops(tid, lvl), topSort);
|
|
}
|
|
|
|
loopEmitter.initialize(
|
|
tensors,
|
|
StringAttr::get(linalgOp.getContext(),
|
|
linalg::GenericOp::getOperationName()),
|
|
/*hasOutput=*/true,
|
|
/*isSparseOut=*/sparseOut != nullptr, topSort,
|
|
// TODO: compute the map and pass it to loop emitter directly instead of
|
|
// passing in a callback.
|
|
/*dependentLvlGetter=*/
|
|
[this](TensorId t,
|
|
Level lvl) -> std::vector<std::pair<TensorLevel, unsigned>> {
|
|
// Translates from a list of loop indices to a list of [tid, lvl] pair.
|
|
std::vector<LoopCoeffPair> &rLoops = merger().getDependentLoops(t, lvl);
|
|
std::vector<std::pair<TensorLevel, unsigned>> ret;
|
|
ret.reserve(rLoops.size());
|
|
for (auto [loop, coeff] : rLoops) {
|
|
TensorLevel tl = makeTensorLevel(merger().getLoopDefiningLvl(loop));
|
|
ret.emplace_back(tl, coeff);
|
|
};
|
|
return ret;
|
|
});
|
|
}
|
|
|
|
std::optional<Operation *> CodegenEnv::genLoopBoundary(
|
|
function_ref<std::optional<Operation *>(MutableArrayRef<Value> parameters)>
|
|
callback) {
|
|
SmallVector<Value> params;
|
|
if (isReduc()) {
|
|
params.push_back(redVal);
|
|
if (redValidLexInsert)
|
|
params.push_back(redValidLexInsert);
|
|
} else {
|
|
assert(!redValidLexInsert);
|
|
}
|
|
if (isExpand())
|
|
params.push_back(expCount);
|
|
if (insChain != nullptr)
|
|
params.push_back(insChain);
|
|
auto r = callback(params); // may update parameters
|
|
unsigned i = 0;
|
|
if (isReduc()) {
|
|
// FIXME: This requires `updateExprValue` to perform updates without
|
|
// checking for a previous value; but it's not clear whether that's
|
|
// by design or might be a potential source for bugs.
|
|
updateReduc(params[i++]);
|
|
if (redValidLexInsert)
|
|
setValidLexInsert(params[i++]);
|
|
}
|
|
if (isExpand())
|
|
updateExpandCount(params[i++]);
|
|
if (insChain != nullptr)
|
|
updateInsertionChain(params[i]);
|
|
return r;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment verify functions.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
bool CodegenEnv::isAdmissibleTensorExp(ExprId exp) {
|
|
// We reject any expression that makes a reduction from `-outTensor`, as those
|
|
// expressions create a dependency between the current iteration (i) and the
|
|
// previous iteration (i-1). It would require iterating over the whole
|
|
// coordinate space, which prevent exploiting sparsity for faster code.
|
|
for (utils::IteratorType it : linalgOp.getIteratorTypesArray()) {
|
|
if (it == utils::IteratorType::reduction) {
|
|
if (latticeMerger.hasNegateOnOut(exp))
|
|
return false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
OpOperand *lhs = linalgOp.getDpsInitOperand(0);
|
|
const TensorId tensor = makeTensorId(lhs->getOperandNumber());
|
|
// An non-annotated output tensor is assumed dense, and becomes a random
|
|
// access n-dim memref. Admissible since insertions cannot occur.
|
|
if (getSparseTensorType(lhs->get()).isAllDense())
|
|
return true;
|
|
|
|
// A tensor expression with a sparse output tensor that changes its values
|
|
// but not its nonzero structure, an operation called "simply dynamic" in
|
|
// [Bik96,Ch9], is also admissible without special env.
|
|
if (latticeMerger.isSingleCondition(tensor, exp))
|
|
return true;
|
|
|
|
// Accept "truly dynamic" if the output tensor materializes uninitialized
|
|
// into the computation and insertions occur in lexicographic index order.
|
|
sparseOut = lhs;
|
|
return isMaterializing(lhs->get());
|
|
}
|
|
|
|
bool CodegenEnv::isAdmissibleTopoOrder() {
|
|
if (!hasSparseOutput())
|
|
return true;
|
|
|
|
OpOperand *lhs = linalgOp.getDpsInitOperand(0);
|
|
// Accept "truly dynamic" if the output tensor materializes uninitialized
|
|
// into the computation and insertions occur in lexicographic index order.
|
|
LoopOrd nest = 0;
|
|
const auto iteratorTypes = linalgOp.getIteratorTypesArray();
|
|
assert(topSortSize() == latticeMerger.getNumLoops());
|
|
for (const LoopId i : topSort) {
|
|
if (!latticeMerger.isFilterLoop(i)) {
|
|
// We only count non-filter loops as filter loops should be considered
|
|
// a special type of parallel loops.
|
|
if (linalg::isReductionIterator(iteratorTypes[i]))
|
|
break; // terminate at first reduction
|
|
nest++;
|
|
}
|
|
}
|
|
// Determine admissible dynamic insertion situations:
|
|
// (1) fully injective, since there are no reductions,
|
|
// (2) admissible 1-d expansion in innermost dimension.
|
|
if (static_cast<int64_t>(nest) >= linalgOp.getRank(lhs) - 1) {
|
|
outerParNest = nest;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment topological sort methods
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
ArrayRef<LoopId> CodegenEnv::getTopSortSlice(LoopOrd n, LoopOrd m) const {
|
|
return ArrayRef<LoopId>(topSort).slice(n, m);
|
|
}
|
|
|
|
ArrayRef<LoopId> CodegenEnv::getLoopStackUpTo(LoopOrd n) const {
|
|
return ArrayRef<LoopId>(topSort).take_front(n);
|
|
}
|
|
|
|
ArrayRef<LoopId> CodegenEnv::getCurrentLoopStack() const {
|
|
return getLoopStackUpTo(loopEmitter.getCurrentDepth());
|
|
}
|
|
|
|
Value CodegenEnv::getLoopVar(LoopId i) const {
|
|
// TODO: this class should store the inverse of `topSort` so that
|
|
// it can do this conversion directly, instead of searching through
|
|
// `topSort` every time. (Or else, `LoopEmitter` should handle this.)
|
|
for (LoopOrd n = 0, numLoops = topSortSize(); n < numLoops; n++)
|
|
if (topSort[n] == i)
|
|
return loopEmitter.getLoopIV(n);
|
|
llvm_unreachable("invalid loop identifier");
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment sparse tensor output and expansion methods
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
void CodegenEnv::updateInsertionChain(Value chain) {
|
|
assert(sparseOut != nullptr && insChain != nullptr);
|
|
insChain = chain;
|
|
}
|
|
|
|
// FIXME: clarify what this "rank" is really supposed to mean/be.
|
|
bool CodegenEnv::atExpandLevel(OpOperand *o, unsigned rank, LoopOrd n) const {
|
|
return sparseOut == o && outerParNest == static_cast<LoopOrd>(rank - 1) &&
|
|
outerParNest == n;
|
|
}
|
|
|
|
void CodegenEnv::startExpand(Value values, Value filled, Value added,
|
|
Value count) {
|
|
assert(sparseOut != nullptr && expValues == nullptr);
|
|
expValues = values;
|
|
expFilled = filled;
|
|
expAdded = added;
|
|
expCount = count;
|
|
}
|
|
|
|
void CodegenEnv::updateExpandCount(Value count) {
|
|
assert(sparseOut != nullptr && expValues != nullptr);
|
|
expCount = count;
|
|
}
|
|
|
|
void CodegenEnv::endExpand() {
|
|
assert(sparseOut != nullptr && expValues != nullptr);
|
|
expValues = expFilled = expAdded = expCount = Value();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Code generation environment reduction methods
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
void CodegenEnv::startReduc(ExprId exp, Value val) {
|
|
assert(!isReduc() && exp != detail::kInvalidId);
|
|
redExp = exp;
|
|
updateReduc(val);
|
|
}
|
|
|
|
void CodegenEnv::updateReduc(Value val) {
|
|
assert(isReduc());
|
|
redVal = val;
|
|
// NOTE: `genLoopBoundary` requires that this performs a unilateral
|
|
// update without checking for a previous value first. (It's not
|
|
// clear whether any other callsites also require that.)
|
|
latticeMerger.updateExprValue(redExp, val);
|
|
}
|
|
|
|
Value CodegenEnv::endReduc() {
|
|
assert(isReduc());
|
|
Value val = redVal;
|
|
redVal = val;
|
|
latticeMerger.clearExprValue(redExp);
|
|
redExp = detail::kInvalidId;
|
|
return val;
|
|
}
|
|
|
|
void CodegenEnv::setValidLexInsert(Value val) {
|
|
assert(isReduc() && val);
|
|
redValidLexInsert = val;
|
|
}
|
|
|
|
void CodegenEnv::clearValidLexInsert() {
|
|
assert(!isReduc());
|
|
redValidLexInsert = Value();
|
|
}
|
|
|
|
void CodegenEnv::startCustomReduc(ExprId exp) {
|
|
assert(!isCustomReduc() && exp != detail::kInvalidId);
|
|
redCustom = exp;
|
|
}
|
|
|
|
Value CodegenEnv::getCustomRedId() {
|
|
assert(isCustomReduc());
|
|
return dyn_cast<sparse_tensor::ReduceOp>(exp(redCustom).op).getIdentity();
|
|
}
|
|
|
|
void CodegenEnv::endCustomReduc() {
|
|
assert(isCustomReduc());
|
|
redCustom = detail::kInvalidId;
|
|
}
|