284 lines
11 KiB
C++
284 lines
11 KiB
C++
//===- LoopEmitter.h --------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_
|
|
#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_
|
|
|
|
#include <vector>
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/Enums.h"
|
|
#include "mlir/IR/PatternMatch.h"
|
|
|
|
namespace mlir {
|
|
namespace sparse_tensor {
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// SparseTensorLoopEmiter class, manages sparse tensors and helps to
|
|
// generate loop structure to (co)-iterate sparse tensors.
|
|
//
|
|
// An example usage:
|
|
// To generate the following loops over T1<?x?> and T2<?x?>
|
|
//
|
|
// for i in TENSOR_1_0 {
|
|
// for j : TENSOR_2_0 {
|
|
// for k : TENSOR_1_1 {}
|
|
// for k : TENSOR_2_1 {}
|
|
// }
|
|
// }
|
|
//
|
|
// One can use
|
|
//
|
|
// SparseTensorLoopEmiter loopEmiter({T1, T1});
|
|
// loopEmiter.initializeLoopEmit();
|
|
// loopEmiter.enterLoopOverTensorAtDim(T1, 0);
|
|
// loopEmiter.enterLoopOverTensorAtDim(T2, 0);
|
|
// loopEmiter.enterLoopOverTensorAtDim(T1, 1);
|
|
// loopEmiter.exitCurrentLoop();
|
|
// loopEmiter.enterLoopOverTensorAtDim(T2, 1);
|
|
// loopEmiter.exitCurrentLoop(); // exit k
|
|
// loopEmiter.exitCurrentLoop(); // exit j
|
|
// loopEmiter.exitCurrentLoop(); // exit i
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
class LoopEmitter {
|
|
public:
|
|
/// Optional callback function to setup dense output tensors when
|
|
/// initializing the loop emitter (e.g., to fill a dense output with zeros).
|
|
using OutputUpdater = function_ref<Value(OpBuilder &builder, Location loc,
|
|
Value memref, Value tensor)>;
|
|
|
|
LoopEmitter() = default;
|
|
|
|
/// Takes an array of tensors inputs, on which the generated loops will
|
|
/// iterate on. The index of the tensor in the array is also the tensor id
|
|
/// (tid) used in related functions. If isSparseOut is set, loop emitter
|
|
/// assume that the sparse output tensor is empty, and will always generate
|
|
/// loops on it based on the dim sizes. An optional array could be provided
|
|
/// (by sparsification) to indicate the loop id sequence that will be
|
|
/// generated. It is used to establish the mapping between affineDimExpr to
|
|
/// the corresponding loop index in the loop stack that are maintained by the
|
|
/// loop emitter.
|
|
void initialize(ValueRange tensors, StringAttr loopTag = nullptr,
|
|
bool hasOutput = false, bool isSparseOut = false,
|
|
ArrayRef<unsigned> topSort = {});
|
|
|
|
explicit LoopEmitter(ValueRange tensors, StringAttr loopTag = nullptr,
|
|
bool hasOutput = false, bool isSparseOut = false,
|
|
ArrayRef<unsigned> topSort = {});
|
|
|
|
/// Starts a loop emitting session by generating all the buffers needed to
|
|
/// iterate tensors.
|
|
void initializeLoopEmit(OpBuilder &builder, Location loc,
|
|
OutputUpdater updater = nullptr);
|
|
|
|
/// Generates a list of operations to compute the affine expression.
|
|
Value genAffine(OpBuilder &builder, AffineExpr a, Location loc);
|
|
|
|
/// Enters a new loop sequence, the loops within the same sequence starts
|
|
/// from the break points of previous loop instead of starting over from 0.
|
|
/// e.g.,
|
|
/// {
|
|
/// // loop sequence start.
|
|
/// p0 = while(xxx)
|
|
/// ...
|
|
/// break p0
|
|
///
|
|
/// // Starts loop from p0
|
|
/// for (i = p0; i < end; i++)
|
|
/// ...
|
|
/// // loop sequence end.
|
|
/// }
|
|
void enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
|
|
ArrayRef<size_t> dims);
|
|
|
|
// exit the current loop sequence, this will reset universal index to 0.
|
|
void exitCurrentLoopSeq() {
|
|
assert(loopSeqStack.size() == loopStack.size() + 1);
|
|
loopSeqStack.pop_back();
|
|
}
|
|
|
|
// TODO: Gets rid of `dim` in the argument list? Track the dimension we
|
|
// are currently at internally. Then it would be enterNextDimForTensor.
|
|
// Still need a way to specify the dim for non annoated dense tensor though,
|
|
// as it can be accessed out of order.
|
|
/// Emits loop over tensor_tid_dim, it assumes that loops between
|
|
/// tensor_tid_[0, dim - 1] have already been generated.
|
|
/// The function will also perform in-place update on the `reduc` vector to
|
|
/// return the reduction variable used inside the generated loop.
|
|
Operation *enterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
|
|
ArrayRef<size_t> tids,
|
|
ArrayRef<size_t> dims,
|
|
MutableArrayRef<Value> reduc = {},
|
|
bool isParallel = false);
|
|
|
|
Operation *enterFilterLoopOverTensorAtDim(OpBuilder &builder, Location loc,
|
|
size_t tid, size_t dim,
|
|
AffineExpr affine,
|
|
MutableArrayRef<Value> reduc = {});
|
|
|
|
void genDenseAffineAddressAtCurLevel(OpBuilder &builder, Location loc,
|
|
size_t tid, size_t dim,
|
|
AffineExpr affine);
|
|
|
|
/// Emits a co-iteration loop over a set of tensors.
|
|
Operation *enterCoIterationOverTensorsAtDims(
|
|
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
|
|
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc = {});
|
|
|
|
void exitCurrentLoop(RewriterBase &rewriter, Location loc,
|
|
MutableArrayRef<Value> reduc = {});
|
|
|
|
/// Returns the array of coordinate for all the loop generated till now.
|
|
void getCoordinateArray(SmallVectorImpl<Value> &coords) const {
|
|
for (auto &l : loopStack)
|
|
coords.push_back(l.iv);
|
|
}
|
|
|
|
/// Gets loop induction variable at the given level.
|
|
unsigned getCurrentDepth() const { return loopStack.size(); }
|
|
|
|
/// Gets loop induction variable at the given level.
|
|
Value getLoopIV(size_t level) const {
|
|
if (level < loopStack.size())
|
|
return loopStack[level].iv;
|
|
return nullptr;
|
|
}
|
|
|
|
///
|
|
/// Getters.
|
|
///
|
|
const std::vector<std::vector<Value>> &getPidxs() const { return pidxs; };
|
|
const std::vector<std::vector<Value>> &getCoord() const { return coord; };
|
|
const std::vector<std::vector<Value>> &getHighs() const { return highs; };
|
|
const std::vector<std::vector<Value>> &getPtrBuffer() const {
|
|
return ptrBuffer;
|
|
};
|
|
const std::vector<std::vector<Value>> &getIdxBuffer() const {
|
|
return idxBuffer;
|
|
};
|
|
const std::vector<Value> &getValBuffer() const { return valBuffer; };
|
|
|
|
constexpr static llvm::StringLiteral getLoopEmitterLoopAttrName() {
|
|
return llvm::StringLiteral("Emitted from");
|
|
}
|
|
|
|
private:
|
|
struct LoopLevelInfo {
|
|
LoopLevelInfo(ArrayRef<size_t> tids, ArrayRef<size_t> dims, Operation *loop,
|
|
Value iv, StringAttr loopTag)
|
|
: tids(tids), dims(dims), loop(loop), iv(iv) {
|
|
// Attached a special tag to loop emitter generated loop.
|
|
if (loopTag)
|
|
loop->setAttr(LoopEmitter::getLoopEmitterLoopAttrName(), loopTag);
|
|
}
|
|
// TODO: maybe use a vector<pair> for tid and dim?
|
|
// The set of tensors that the loop is operating on
|
|
const llvm::SmallVector<size_t> tids;
|
|
// The corresponding dims for the tensors
|
|
const llvm::SmallVector<size_t> dims;
|
|
const Operation *loop; // the loop operation
|
|
const Value iv; // the induction variable for the loop
|
|
};
|
|
|
|
/// Linearizes address for dense dimension (i.e., p = (i * d0) + j).
|
|
Value genAddress(OpBuilder &builder, Location loc, size_t tid, size_t dim,
|
|
Value iv);
|
|
|
|
bool isOutputTensor(size_t tid) {
|
|
return hasOutput && tid == tensors.size() - 1;
|
|
}
|
|
|
|
bool isSparseOutput(size_t tid) { return isOutputTensor(tid) && isSparseOut; }
|
|
|
|
/// Setups [lo, hi] for iterating tensor[dim], it assumes that tensor[0
|
|
/// ...dims-1] has already been setup.
|
|
void prepareLoopOverTensorAtDim(OpBuilder &builder, Location loc, size_t tid,
|
|
size_t dim);
|
|
|
|
/// Emits extra locals, since the locals might not be in simplified lattices
|
|
/// point used to generate the loops, but are still required to generates
|
|
/// expressions.
|
|
void emitExtraLocalsForTensorsAtDenseDims(OpBuilder &builder, Location loc,
|
|
ArrayRef<size_t> tids,
|
|
ArrayRef<size_t> dims);
|
|
|
|
/// Exits a for loop, returns the reduction results, e.g.,
|
|
/// For sequential for loops:
|
|
/// %ret = for () {
|
|
/// ...
|
|
/// %val = addi %args, %c
|
|
/// yield %val
|
|
/// }
|
|
/// For parallel loops, the following generated code by users:
|
|
/// %ret = parallel () init(%args) {
|
|
/// ...
|
|
/// %val = op %args, %c
|
|
/// }
|
|
/// will be transformed into
|
|
/// %ret = parallel () init(%args) {
|
|
/// ...
|
|
/// scf.reduce(%c) bb0(%0, %1){
|
|
/// %val = op %0, %1
|
|
/// scf.reduce.return %val
|
|
/// }
|
|
/// }
|
|
/// NOTE: only one instruction will be moved into reduce block,
|
|
/// transformation will fail if multiple instructions are used to compute
|
|
/// the reduction value. Return %ret to user, while %val is provided by
|
|
/// users (`reduc`).
|
|
void exitForLoop(RewriterBase &rewriter, Location loc,
|
|
MutableArrayRef<Value> reduc);
|
|
|
|
/// Exits a while loop, returns the reduction results.
|
|
void exitCoIterationLoop(OpBuilder &builder, Location loc,
|
|
MutableArrayRef<Value> reduc);
|
|
|
|
/// A optional string attribute that should be attached to the loop
|
|
/// generated by loop emitter, it might help following passes to identify
|
|
/// loops that operates on sparse tensors more easily.
|
|
StringAttr loopTag;
|
|
/// Whether the loop emitter needs to treat the last tensor as the output
|
|
/// tensor.
|
|
bool hasOutput;
|
|
bool isSparseOut;
|
|
/// Input and (optional) output tensors.
|
|
std::vector<Value> tensors;
|
|
/// The dim type array for each tensor.
|
|
std::vector<std::vector<DimLevelType>> dimTypes;
|
|
/// Sparse iteration information (by tensor and dim). These arrays
|
|
/// are updated to remain current within the current loop.
|
|
std::vector<std::vector<Value>> pidxs;
|
|
std::vector<std::vector<Value>> coord;
|
|
std::vector<std::vector<Value>> highs;
|
|
std::vector<std::vector<Value>> ptrBuffer; // to_pointers
|
|
std::vector<std::vector<Value>> idxBuffer; // to_indices
|
|
std::vector<Value> valBuffer; // to_value
|
|
|
|
// Loop Stack, stores the information of all the nested loops that are
|
|
// alive.
|
|
std::vector<LoopLevelInfo> loopStack;
|
|
|
|
// Loop Sequence Stack, stores the unversial index for the current loop
|
|
// sequence.
|
|
std::vector<Value> loopSeqStack;
|
|
|
|
// Maps AffineDimExpr to the index of the loop in loopStack.
|
|
// TODO: We should probably use a callback function here to make it more
|
|
// general.
|
|
std::vector<unsigned> sparsiferLoopLvlMap;
|
|
|
|
// TODO: not yet used, it should track the current level for each tensor
|
|
// to help eliminate `dim` paramters from above APIs.
|
|
// std::vector<size_t> curLv;
|
|
};
|
|
|
|
} // namespace sparse_tensor
|
|
} // namespace mlir
|
|
|
|
#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_SPARSETENSORLOOPEMITTER_H_
|