Files
clang-p2996/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
Aart Bik 5661647e85 [mlir][sparse] build proper insertion chain
The alloc->insert/compress->load chain needs to be
properly represented with an SSA chain now in loops
and if statements to properly reflect the modifying
behavior (runtime support lib is forgiving on breaking
this, but the new codegen is not).

Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D136966
2022-10-28 15:58:51 -07:00

942 lines
36 KiB
C++

//===- CodegenUtils.cpp - Utilities for generating MLIR -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "CodegenUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/Types.h"
#include "mlir/IR/Value.h"
using namespace mlir;
using namespace mlir::sparse_tensor;
/// Generates a pointer/index load from the sparse storage scheme. Narrower
/// data types need to be zero extended before casting the value into the
/// index type used for looping and indexing.
static Value genIndexLoad(OpBuilder &builder, Location loc, Value ptr,
Value s) {
// For the scalar case, we simply zero extend narrower indices into 64-bit
// values before casting to index without a performance penalty. Here too,
// however, indices that already are 64-bit, in theory, cannot express the
// full range as explained above.
Value load = builder.create<memref::LoadOp>(loc, ptr, s);
if (!load.getType().isa<IndexType>()) {
if (load.getType().getIntOrFloatBitWidth() < 64)
load = builder.create<arith::ExtUIOp>(loc, builder.getI64Type(), load);
load =
builder.create<arith::IndexCastOp>(loc, builder.getIndexType(), load);
}
return load;
}
/// If the tensor is a sparse constant, generates and returns the pair of
/// the constants for the indices and the values.
static Optional<std::pair<Value, Value>>
genSplitSparseConstant(OpBuilder &builder, Location loc, Value tensor) {
if (auto constOp = tensor.getDefiningOp<arith::ConstantOp>()) {
if (auto attr = constOp.getValue().dyn_cast<SparseElementsAttr>()) {
DenseElementsAttr indicesAttr = attr.getIndices();
Value indices = builder.create<arith::ConstantOp>(loc, indicesAttr);
DenseElementsAttr valuesAttr = attr.getValues();
Value values = builder.create<arith::ConstantOp>(loc, valuesAttr);
return std::make_pair(indices, values);
}
}
return {};
}
/// Generates the code to copy the index at indices[ivs] to ind, and return
/// the value at value[ivs].
static Value genIndexAndValueForSparse(OpBuilder &builder, Location loc,
Value indices, Value values,
SmallVectorImpl<Value> &indicesArray,
ValueRange ivs, unsigned rank) {
for (unsigned i = 0; i < rank; i++) {
Value idx = constantIndex(builder, loc, i);
Value val = builder.create<tensor::ExtractOp>(loc, indices,
ValueRange{ivs[0], idx});
val = builder.create<arith::IndexCastOp>(loc, builder.getIndexType(), val);
// builder.create<memref::StoreOp>(loc, val, ind, idx);
indicesArray.push_back(val);
}
return builder.create<tensor::ExtractOp>(loc, values, ivs[0]);
}
/// Generates the code to read the value from tensor[ivs], and conditionally
/// stores the indices ivs to the memory in ind. The generated code looks like
/// the following and the insertion point after this routine is inside the
/// if-then branch behind the assignment to ind. This is to ensure that the
/// code that uses the ind, such as an addEltX call generated after, is inside
/// the if-then branch.
/// if (tensor[ivs] != 0)
/// ind = ivs
static Value genIndexAndValueForDense(OpBuilder &builder, Location loc,
Value tensor,
SmallVectorImpl<Value> &indicesArray,
ValueRange ivs) {
Value val = genValueForDense(builder, loc, tensor, ivs);
indicesArray.append(ivs.begin(), ivs.end());
return val;
}
//===----------------------------------------------------------------------===//
// Sparse tensor loop emitter class implementations
//===----------------------------------------------------------------------===//
SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors,
bool hasOutput,
bool isSparseOut)
: hasOutput(hasOutput), isSparseOut(isSparseOut),
tensors(tensors.begin(), tensors.end()), dimTypes(tensors.size()),
pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()),
ptrBuffer(tensors.size()), idxBuffer(tensors.size()),
valBuffer(tensors.size()), loopStack() {
for (size_t tid = 0, e = tensors.size(); tid < e; tid++) {
auto t = tensors[tid];
// a scalar or 0-dimension tensors
if (isZeroRankedTensorOrScalar(t.getType()))
continue;
auto rtp = t.getType().cast<RankedTensorType>();
auto rank = static_cast<size_t>(rtp.getRank());
auto enc = getSparseTensorEncoding(rtp);
// We always treat sparse output tensor as dense so that we always iterate
// it based on dim size.
if (enc && !(isOutputTensor(tid) && isSparseOut))
for (auto dimTp : enc.getDimLevelType())
dimTypes[tid].push_back(dimTp);
else
dimTypes[tid].assign(rank, DimLevelType::Dense);
// Initialize using empty value.
pidxs[tid].assign(rank, Value());
coord[tid].assign(rank, Value());
highs[tid].assign(rank, Value());
ptrBuffer[tid].assign(rank, Value());
idxBuffer[tid].assign(rank, Value());
}
}
void SparseTensorLoopEmitter::initializeLoopEmit(
OpBuilder &builder, Location loc,
SparseTensorLoopEmitter::OutputUpdater updater) {
// For every tensor, find lower and upper bound on dimensions, set the
// same bounds on loop indices, and obtain dense or sparse buffer(s).
for (size_t t = 0, e = tensors.size(); t < e; t++) {
auto tensor = tensors[t];
auto rtp = tensor.getType().dyn_cast<RankedTensorType>();
if (!rtp)
// Skips only scalar, zero ranked tensor still need to be bufferized and
// (probably) filled with zeros by users.
continue;
auto rank = rtp.getRank();
auto shape = rtp.getShape();
auto enc = getSparseTensorEncoding(rtp);
auto dynShape = {ShapedType::kDynamicSize};
// Scan all dimensions of current tensor.
for (int64_t d = 0; d < rank; d++) {
// This should be called only once at beginning.
assert(!ptrBuffer[t][d] && !idxBuffer[t][d] && !highs[t][d]);
// Handle sparse storage schemes.
if (isCompressedDLT(dimTypes[t][d])) {
auto ptrTp =
MemRefType::get(dynShape, getPointerOverheadType(builder, enc));
auto indTp =
MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
auto dim = builder.getIndexAttr(d);
// Generate sparse primitives to obtains pointer and indices.
ptrBuffer[t][d] = builder.create<ToPointersOp>(loc, ptrTp, tensor, dim);
idxBuffer[t][d] = builder.create<ToIndicesOp>(loc, indTp, tensor, dim);
} else if (isSingletonDLT(dimTypes[t][d])) {
// Singleton dimension, fetch indices.
auto indTp =
MemRefType::get(dynShape, getIndexOverheadType(builder, enc));
auto dim = builder.getIndexAttr(d);
idxBuffer[t][d] = builder.create<ToIndicesOp>(loc, indTp, tensor, dim);
} else {
// Dense dimension, nothing to fetch.
assert(isDenseDLT(dimTypes[t][d]));
}
// Find upper bound in current dimension.
unsigned p = toOrigDim(enc, d);
Value up = mlir::linalg::createOrFoldDimOp(builder, loc, tensor, p);
highs[t][d] = up;
}
// Perform the required bufferization. Dense inputs materialize
// from the input tensors. Sparse inputs use sparse primitives to obtain the
// values.
// Delegates extra output initialization to clients.
bool isOutput = isOutputTensor(t);
Type elementType = rtp.getElementType();
if (!enc) {
// Non-annotated dense tensors.
auto denseTp = MemRefType::get(shape, elementType);
Value denseVal =
builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
// Dense outputs need special handling.
if (isOutput && updater)
denseVal = updater(builder, loc, denseVal, tensor);
valBuffer[t] = denseVal;
} else {
// Annotated sparse tensors.
// We also need the value buffer for annotated all dense `sparse` tensor.
auto dynShape = {ShapedType::kDynamicSize};
auto sparseTp = MemRefType::get(dynShape, elementType);
valBuffer[t] = builder.create<ToValuesOp>(loc, sparseTp, tensor);
}
// NOTE: we can also prepares for 0 dim here in advance, this will hosit
// some loop preparation from tensor iteration, but will also (undesirably)
// hosit the code ouside if conditions.
}
}
void SparseTensorLoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc,
ArrayRef<size_t> tids,
ArrayRef<size_t> dims) {
// Universal Index start from 0
assert(loopSeqStack.size() == loopStack.size());
// Universal index starts from 0
loopSeqStack.emplace_back(constantIndex(builder, loc, 0));
// Prepares for all the tensors used in the current loop sequence.
for (auto [tid, dim] : llvm::zip(tids, dims))
prepareLoopOverTensorAtDim(builder, loc, tid, dim);
}
Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
OpBuilder &builder, Location loc, size_t tid, size_t dim,
MutableArrayRef<Value> reduc, bool isParallel, ArrayRef<size_t> extraTids,
ArrayRef<size_t> extraDims) {
assert(dimTypes[tid].size() > dim);
// We can not re-enter the same level.
assert(!coord[tid][dim]);
Value step = constantIndex(builder, loc, 1);
auto dimType = dimTypes[tid][dim];
bool isSparseInput = isCompressedDLT(dimType) || isSingletonDLT(dimType);
assert(isDenseDLT(dimType) || isCompressedDLT(dimType) ||
isSingletonDLT(dimType));
Value lo = isSparseInput ? pidxs[tid][dim] // current offset
: loopSeqStack.back(); // univeral tid
Value hi = highs[tid][dim];
scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
builder.setInsertionPointToStart(forOp.getBody());
Value iv = forOp.getInductionVar();
assert(iv);
if (isSparseInput) {
pidxs[tid][dim] = iv;
// Generating a load on the indices array yields the coordinate.
Value ptr = idxBuffer[tid][dim];
coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv);
} else {
// Dense tensor, the coordinates is the inducation variable.
coord[tid][dim] = iv;
// generate pidx for dense dim (pidx = i * sz + j)
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc && !isSparseOutput(tid))
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, iv);
}
// NOTE: we can also prepares for next dim here in advance
// Push the loop into stack
loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), forOp,
coord[tid][dim]);
// Emit extra locals.
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);
// In-place update on the reduction variable vector.
assert(forOp.getNumRegionIterArgs() == reduc.size());
for (int i = 0, e = reduc.size(); i < e; i++)
reduc[i] = forOp.getRegionIterArg(i);
return forOp;
}
Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc,
ArrayRef<size_t> extraTids, ArrayRef<size_t> extraDims) {
assert(tids.size() == dims.size());
SmallVector<Type, 4> types;
SmallVector<Value, 4> operands;
// Construct the while-loop with a parameter for each index.
Type indexType = builder.getIndexType();
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
assert(pidxs[tid][dim]);
types.push_back(indexType);
operands.push_back(pidxs[tid][dim]);
}
}
// The position where user-supplied reduction variable starts.
for (Value rec : reduc) {
types.push_back(rec.getType());
operands.push_back(rec);
}
if (needsUniv) {
types.push_back(indexType);
// Update universal index.
operands.push_back(loopSeqStack.back());
}
assert(types.size() == operands.size());
scf::WhileOp whileOp = builder.create<scf::WhileOp>(loc, types, operands);
SmallVector<Location> locs(types.size(), loc);
Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs);
Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs);
// Build the "before" region, which effectively consists
// of a conjunction of "i < upper" tests on all induction.
builder.setInsertionPointToStart(&whileOp.getBefore().front());
Value cond;
unsigned o = 0;
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value op1 = before->getArgument(o);
Value op2 = highs[tid][dim];
Value opc = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ult,
op1, op2);
cond = cond ? builder.create<arith::AndIOp>(loc, cond, opc) : opc;
// Update
pidxs[tid][dim] = after->getArgument(o++);
}
}
builder.create<scf::ConditionOp>(loc, cond, before->getArguments());
// Generates while body.
builder.setInsertionPointToStart(&whileOp.getAfter().front());
Value min;
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// Prepares for next level.
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value ptr = idxBuffer[tid][dim];
Value s = pidxs[tid][dim];
Value load = genIndexLoad(builder, loc, ptr, s);
coord[tid][dim] = load;
if (!needsUniv) {
if (min) {
Value cmp = builder.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ult, load, min);
min = builder.create<arith::SelectOp>(loc, cmp, load, min);
} else {
min = load;
}
}
}
}
if (needsUniv) {
assert(!min);
// Otherwise, universal index is the minimal pidx.
min = after->getArguments().back();
}
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// All dense dim (as well as sparse output tensor) shared the same pidx in
// the while loop.
if (isDenseDLT(dimTypes[tid][dim])) {
pidxs[tid][dim] = min;
// generate pidx for dense dim (pidx = i * sz + j)
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc && !isSparseOutput(tid))
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, min);
}
// NOTE: we can also prepares for next dim here in advance
}
// Sets up the loop stack.
loopStack.emplace_back(tids, dims, whileOp, min);
assert(loopStack.size() == loopSeqStack.size());
// Emits extra locals
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);
// Updates reduction variables
assert(after->getNumArguments() == o + reduc.size() + (needsUniv ? 1 : 0));
// In-place update on reduction variable.
for (unsigned i = 0, e = reduc.size(); i < e; i++)
reduc[i] = after->getArgument(o + i);
return whileOp;
}
void SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder,
Location loc,
size_t tid,
size_t dim) {
assert(dimTypes[tid].size() > dim);
auto dimType = dimTypes[tid][dim];
if (isDenseDLT(dimType))
return;
// Either the first dimension, or the previous dimension has been set.
assert(dim == 0 || pidxs[tid][dim - 1]);
Value c0 = constantIndex(builder, loc, 0);
Value c1 = constantIndex(builder, loc, 1);
if (isCompressedDLT(dimType)) {
Value ptr = ptrBuffer[tid][dim];
Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1];
pidxs[tid][dim] = genIndexLoad(builder, loc, ptr, pLo);
Value pHi = builder.create<arith::AddIOp>(loc, pLo, c1);
highs[tid][dim] = genIndexLoad(builder, loc, ptr, pHi);
return;
}
if (isSingletonDLT(dimType)) {
Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1];
Value pHi = builder.create<arith::AddIOp>(loc, pLo, c1);
pidxs[tid][dim] = pLo;
highs[tid][dim] = pHi;
return;
}
llvm_unreachable("Unrecognizable dimesion type!");
}
void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims) {
// Initialize dense positions. Note that we generate dense indices of the
// output tensor unconditionally, since they may not appear in the lattice,
// but may be needed for linearized codegen.
for (auto [tid, dim] : llvm::zip(tids, dims)) {
assert(isDenseDLT(dimTypes[tid][dim]));
auto enc = getSparseTensorEncoding(tensors[tid].getType());
if (enc && !isSparseOutput(tid)) {
bool validPidx = dim == 0 || pidxs[tid][dim - 1];
if (!validPidx) {
// We might not find the pidx for the sparse output tensor as it is
// unconditionally required by the sparsification.
assert(isOutputTensor(tid));
continue;
}
pidxs[tid][dim] = genAddress(builder, loc, tid, dim, loopStack.back().iv);
// NOTE: we can also prepares for next dim here in advance
}
}
}
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
LoopLevelInfo &loopInfo = loopStack.back();
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
auto forOp = llvm::cast<scf::ForOp>(loopInfo.loop);
if (!reduc.empty()) {
assert(reduc.size() == forOp.getNumResults());
builder.setInsertionPointToEnd(forOp.getBody());
builder.create<scf::YieldOp>(loc, reduc);
}
// Finished iterating a tensor, clean up
// We only do the clean up on for loop as while loops do not necessarily
// finish the iteration on a sparse tensor
for (auto [tid, dim] : llvm::zip(tids, dims)) {
// Reset to null.
coord[tid][dim] = Value();
pidxs[tid][dim] = Value();
// Dense dimension, high is fixed.
if (!isDenseDLT(dimTypes[tid][dim]))
highs[tid][dim] = Value();
}
// exit the loop
builder.setInsertionPointAfter(forOp);
return forOp.getResults();
}
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
auto whileOp = llvm::cast<scf::WhileOp>(loopStack.back().loop);
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
Value iv = loopStack.back().iv;
// Generation while loop induction at the end.
builder.setInsertionPointToEnd(&whileOp.getAfter().front());
// Finalize the induction. Note that the induction could be performed
// in the individual if-branches to avoid re-evaluating the conditions.
// However, that would result in a rather elaborate forest of yield
// instructions during code generation. Moreover, performing the induction
// after the if-statements more closely resembles code generated by TACO.
unsigned o = 0;
SmallVector<Value, 4> operands;
Value one = constantIndex(builder, loc, 1);
for (auto [tid, dim] : llvm::zip(tids, dims)) {
if (isCompressedDLT(dimTypes[tid][dim]) ||
isSingletonDLT(dimTypes[tid][dim])) {
Value op1 = coord[tid][dim];
Value op3 = pidxs[tid][dim];
Value cmp =
builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, op1, iv);
Value add = builder.create<arith::AddIOp>(loc, op3, one);
operands.push_back(builder.create<arith::SelectOp>(loc, cmp, add, op3));
// Following loops continue iteration from the break point of the
// current while loop.
pidxs[tid][dim] = whileOp->getResult(o++);
// The coordinates are invalid now.
coord[tid][dim] = nullptr;
// highs remains unchanged.
}
}
// Reduction value from users.
SmallVector<Value, 2> ret;
for (auto red : reduc) {
operands.push_back(red);
ret.push_back(whileOp->getResult(o++));
}
// An (optional) universal index.
if (operands.size() < whileOp.getNumResults()) {
assert(operands.size() + 1 == whileOp.getNumResults());
// The last one is the universial index.
operands.push_back(builder.create<arith::AddIOp>(loc, iv, one));
// update the loop starting point of current loop sequence
loopSeqStack.back() = whileOp->getResult(o++);
}
assert(o == operands.size());
builder.create<scf::YieldOp>(loc, operands);
builder.setInsertionPointAfter(whileOp);
return ret;
}
SmallVector<Value, 2>
SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc,
ArrayRef<Value> reduc) {
// Clean up the values, it would help use to discover potential bug at a
// earlier stage (instead of silently using a wrong value).
LoopLevelInfo &loopInfo = loopStack.back();
assert(loopInfo.tids.size() == loopInfo.dims.size());
SmallVector<Value, 2> red;
if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
red = exitCoiterationLoop(builder, loc, reduc);
} else {
red = exitForLoop(builder, loc, reduc);
}
assert(loopStack.size() == loopSeqStack.size());
loopStack.pop_back();
return red;
}
//===----------------------------------------------------------------------===//
// ExecutionEngine/SparseTensorUtils helper functions.
//===----------------------------------------------------------------------===//
OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) {
switch (width) {
case 64:
return OverheadType::kU64;
case 32:
return OverheadType::kU32;
case 16:
return OverheadType::kU16;
case 8:
return OverheadType::kU8;
case 0:
return OverheadType::kIndex;
}
llvm_unreachable("Unsupported overhead bitwidth");
}
OverheadType mlir::sparse_tensor::overheadTypeEncoding(Type tp) {
if (tp.isIndex())
return OverheadType::kIndex;
if (auto intTp = tp.dyn_cast<IntegerType>())
return overheadTypeEncoding(intTp.getWidth());
llvm_unreachable("Unknown overhead type");
}
Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) {
switch (ot) {
case OverheadType::kIndex:
return builder.getIndexType();
case OverheadType::kU64:
return builder.getIntegerType(64);
case OverheadType::kU32:
return builder.getIntegerType(32);
case OverheadType::kU16:
return builder.getIntegerType(16);
case OverheadType::kU8:
return builder.getIntegerType(8);
}
llvm_unreachable("Unknown OverheadType");
}
OverheadType mlir::sparse_tensor::pointerOverheadTypeEncoding(
const SparseTensorEncodingAttr &enc) {
return overheadTypeEncoding(enc.getPointerBitWidth());
}
OverheadType mlir::sparse_tensor::indexOverheadTypeEncoding(
const SparseTensorEncodingAttr &enc) {
return overheadTypeEncoding(enc.getIndexBitWidth());
}
Type mlir::sparse_tensor::getPointerOverheadType(
Builder &builder, const SparseTensorEncodingAttr &enc) {
return getOverheadType(builder, pointerOverheadTypeEncoding(enc));
}
Type mlir::sparse_tensor::getIndexOverheadType(
Builder &builder, const SparseTensorEncodingAttr &enc) {
return getOverheadType(builder, indexOverheadTypeEncoding(enc));
}
// TODO: Adjust the naming convention for the constructors of
// `OverheadType` so we can use the `MLIR_SPARSETENSOR_FOREVERY_O` x-macro
// here instead of `MLIR_SPARSETENSOR_FOREVERY_FIXED_O`; to further reduce
// the possibility of typo bugs or things getting out of sync.
StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(OverheadType ot) {
switch (ot) {
case OverheadType::kIndex:
return "0";
#define CASE(ONAME, O) \
case OverheadType::kU##ONAME: \
return #ONAME;
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(CASE)
#undef CASE
}
llvm_unreachable("Unknown OverheadType");
}
StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(Type tp) {
return overheadTypeFunctionSuffix(overheadTypeEncoding(tp));
}
PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) {
if (elemTp.isF64())
return PrimaryType::kF64;
if (elemTp.isF32())
return PrimaryType::kF32;
if (elemTp.isF16())
return PrimaryType::kF16;
if (elemTp.isBF16())
return PrimaryType::kBF16;
if (elemTp.isInteger(64))
return PrimaryType::kI64;
if (elemTp.isInteger(32))
return PrimaryType::kI32;
if (elemTp.isInteger(16))
return PrimaryType::kI16;
if (elemTp.isInteger(8))
return PrimaryType::kI8;
if (auto complexTp = elemTp.dyn_cast<ComplexType>()) {
auto complexEltTp = complexTp.getElementType();
if (complexEltTp.isF64())
return PrimaryType::kC64;
if (complexEltTp.isF32())
return PrimaryType::kC32;
}
llvm_unreachable("Unknown primary type");
}
StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(PrimaryType pt) {
switch (pt) {
#define CASE(VNAME, V) \
case PrimaryType::k##VNAME: \
return #VNAME;
MLIR_SPARSETENSOR_FOREVERY_V(CASE)
#undef CASE
}
llvm_unreachable("Unknown PrimaryType");
}
StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(Type elemTp) {
return primaryTypeFunctionSuffix(primaryTypeEncoding(elemTp));
}
//===----------------------------------------------------------------------===//
// Misc code generators.
//===----------------------------------------------------------------------===//
mlir::Attribute mlir::sparse_tensor::getOneAttr(Builder &builder, Type tp) {
if (tp.isa<FloatType>())
return builder.getFloatAttr(tp, 1.0);
if (tp.isa<IndexType>())
return builder.getIndexAttr(1);
if (auto intTp = tp.dyn_cast<IntegerType>())
return builder.getIntegerAttr(tp, APInt(intTp.getWidth(), 1));
if (tp.isa<RankedTensorType, VectorType>()) {
auto shapedTp = tp.cast<ShapedType>();
if (auto one = getOneAttr(builder, shapedTp.getElementType()))
return DenseElementsAttr::get(shapedTp, one);
}
llvm_unreachable("Unsupported attribute type");
}
Value mlir::sparse_tensor::genIsNonzero(OpBuilder &builder, mlir::Location loc,
Value v) {
Type tp = v.getType();
Value zero = constantZero(builder, loc, tp);
if (tp.isa<FloatType>())
return builder.create<arith::CmpFOp>(loc, arith::CmpFPredicate::UNE, v,
zero);
if (tp.isIntOrIndex())
return builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::ne, v,
zero);
if (tp.dyn_cast<ComplexType>())
return builder.create<complex::NotEqualOp>(loc, v, zero);
llvm_unreachable("Non-numeric type");
}
void mlir::sparse_tensor::genReshapeDstShape(
Location loc, PatternRewriter &rewriter, SmallVector<Value, 4> &dstShape,
ArrayRef<Value> srcShape, ArrayRef<int64_t> staticDstShape,
ArrayRef<ReassociationIndices> reassociation) {
// Collapse shape.
if (reassociation.size() < srcShape.size()) {
unsigned start = 0;
for (const auto &map : llvm::enumerate(reassociation)) {
auto dstDim = constantIndex(rewriter, loc, 1);
for (unsigned i = start; i < start + map.value().size(); i++) {
dstDim = rewriter.create<arith::MulIOp>(loc, dstDim, srcShape[i]);
}
dstShape.push_back(dstDim);
start = start + map.value().size();
}
assert(start == srcShape.size());
return;
}
// Expand shape.
assert(reassociation.size() == srcShape.size());
unsigned start = 0;
// Expand the i-th dimension in srcShape.
for (unsigned i = 0, size = srcShape.size(); i < size; i++) {
const auto &map = reassociation[i];
auto srcDim = srcShape[i];
// Iterate through dimensions expanded from the i-th dimension.
for (unsigned j = start; j < start + map.size(); j++) {
// There can be only one dynamic sized dimension among dimensions
// expanded from the i-th dimension in srcShape.
// For example, if srcDim = 8, then the expanded shape could be <2x?x2>,
// but not <2x?x?>.
if (staticDstShape[j] == ShapedType::kDynamicSize) {
// The expanded dimension has dynamic size. We compute the dimension
// by dividing srcDim by the product of the static dimensions.
int64_t product = 1;
for (unsigned k = start; k < start + map.size(); k++) {
if (staticDstShape[k] != ShapedType::kDynamicSize) {
product *= staticDstShape[k];
}
}
// Compute the dynamic dimension size.
Value productVal = constantIndex(rewriter, loc, product);
Value dynamicSize =
rewriter.create<arith::DivUIOp>(loc, srcDim, productVal);
dstShape.push_back(dynamicSize);
} else {
// The expanded dimension is statically known.
dstShape.push_back(constantIndex(rewriter, loc, staticDstShape[j]));
}
}
start = start + map.size();
}
assert(start == staticDstShape.size());
}
void mlir::sparse_tensor::translateIndicesArray(
OpBuilder &builder, Location loc,
ArrayRef<ReassociationIndices> reassociation, ValueRange srcIndices,
ArrayRef<Value> srcShape, ArrayRef<Value> dstShape,
SmallVectorImpl<Value> &dstIndices) {
unsigned i = 0;
unsigned start = 0;
unsigned dstRank = dstShape.size();
unsigned srcRank = srcShape.size();
assert(srcRank == srcIndices.size());
bool isCollapse = srcRank > dstRank;
ArrayRef<Value> shape = isCollapse ? srcShape : dstShape;
// Iterate over reassociation map.
for (const auto &map : llvm::enumerate(reassociation)) {
// Prepare strides information in dimension slice.
Value linear = constantIndex(builder, loc, 1);
for (unsigned j = start, end = start + map.value().size(); j < end; j++) {
linear = builder.create<arith::MulIOp>(loc, linear, shape[j]);
}
// Start expansion.
Value val;
if (!isCollapse)
val = srcIndices[i];
// Iterate over dimension slice.
for (unsigned j = start, end = start + map.value().size(); j < end; j++) {
linear = builder.create<arith::DivUIOp>(loc, linear, shape[j]);
if (isCollapse) {
Value old = srcIndices[j];
Value mul = builder.create<arith::MulIOp>(loc, old, linear);
val = val ? builder.create<arith::AddIOp>(loc, val, mul) : mul;
} else {
Value old = val;
val = builder.create<arith::DivUIOp>(loc, val, linear);
assert(dstIndices.size() == j);
dstIndices.push_back(val);
val = builder.create<arith::RemUIOp>(loc, old, linear);
}
}
// Finalize collapse.
if (isCollapse) {
assert(dstIndices.size() == i);
dstIndices.push_back(val);
}
start += map.value().size();
i++;
}
assert(dstIndices.size() == dstRank);
}
FlatSymbolRefAttr mlir::sparse_tensor::getFunc(ModuleOp module, StringRef name,
TypeRange resultType,
ValueRange operands,
EmitCInterface emitCInterface) {
MLIRContext *context = module.getContext();
auto result = SymbolRefAttr::get(context, name);
auto func = module.lookupSymbol<func::FuncOp>(result.getAttr());
if (!func) {
OpBuilder moduleBuilder(module.getBodyRegion());
func = moduleBuilder.create<func::FuncOp>(
module.getLoc(), name,
FunctionType::get(context, operands.getTypes(), resultType));
func.setPrivate();
if (static_cast<bool>(emitCInterface))
func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
UnitAttr::get(context));
}
return result;
}
func::CallOp mlir::sparse_tensor::createFuncCall(
OpBuilder &builder, Location loc, StringRef name, TypeRange resultType,
ValueRange operands, EmitCInterface emitCInterface) {
auto module = builder.getBlock()->getParentOp()->getParentOfType<ModuleOp>();
FlatSymbolRefAttr fn =
getFunc(module, name, resultType, operands, emitCInterface);
return builder.create<func::CallOp>(loc, resultType, fn, operands);
}
Type mlir::sparse_tensor::getOpaquePointerType(OpBuilder &builder) {
return LLVM::LLVMPointerType::get(builder.getI8Type());
}
Value mlir::sparse_tensor::genAlloca(OpBuilder &builder, Location loc,
unsigned sz, Type tp) {
return genAlloca(builder, loc, constantIndex(builder, loc, sz), tp);
}
Value mlir::sparse_tensor::genAlloca(OpBuilder &builder, Location loc, Value sz,
Type tp) {
auto memTp = MemRefType::get({ShapedType::kDynamicSize}, tp);
return builder.create<memref::AllocaOp>(loc, memTp, ValueRange{sz});
}
Value mlir::sparse_tensor::genAllocaScalar(OpBuilder &builder, Location loc,
Type tp) {
return builder.create<memref::AllocaOp>(loc, MemRefType::get({}, tp));
}
Value mlir::sparse_tensor::allocDenseTensor(OpBuilder &builder, Location loc,
RankedTensorType tensorTp,
ValueRange sizes) {
Type elemTp = tensorTp.getElementType();
auto shape = tensorTp.getShape();
auto memTp = MemRefType::get(shape, elemTp);
SmallVector<Value> dynamicSizes;
for (unsigned i = 0, rank = tensorTp.getRank(); i < rank; i++) {
if (shape[i] == ShapedType::kDynamicSize)
dynamicSizes.push_back(sizes[i]);
}
Value mem = builder.create<memref::AllocOp>(loc, memTp, dynamicSizes);
Value zero = constantZero(builder, loc, elemTp);
builder.create<linalg::FillOp>(loc, ValueRange{zero}, ValueRange{mem});
return mem;
}
Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc,
Value tensor, ValueRange ivs) {
Value val = builder.create<tensor::ExtractOp>(loc, tensor, ivs);
Value cond = genIsNonzero(builder, loc, val);
scf::IfOp ifOp = builder.create<scf::IfOp>(loc, cond, /*else*/ false);
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
return val;
}
void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop(
OpBuilder &builder, Location loc, Value src, unsigned rank,
function_ref<void(OpBuilder &, Location, Value, ValueRange)> bodyBuilder) {
SmallVector<Value, 4> indicesArray;
SmallVector<Value> lo;
SmallVector<Value> hi;
SmallVector<Value> st;
Value zero = constantIndex(builder, loc, 0);
Value one = constantIndex(builder, loc, 1);
auto indicesValues = genSplitSparseConstant(builder, loc, src);
bool isCOOConstant = indicesValues.has_value();
Value indices;
Value values;
if (isCOOConstant) {
indices = indicesValues->first;
values = indicesValues->second;
lo.push_back(zero);
hi.push_back(linalg::createOrFoldDimOp(builder, loc, values, 0));
st.push_back(one);
} else {
for (unsigned i = 0; i < rank; i++) {
lo.push_back(zero);
hi.push_back(linalg::createOrFoldDimOp(builder, loc, src, i));
st.push_back(one);
}
}
scf::buildLoopNest(
builder, loc, lo, hi, st, {},
[&](OpBuilder &builder, Location loc, ValueRange ivs,
ValueRange args) -> scf::ValueVector {
Value val;
if (isCOOConstant)
val = genIndexAndValueForSparse(builder, loc, indices, values,
indicesArray, ivs, rank);
else
val = genIndexAndValueForDense(builder, loc, src, indicesArray, ivs);
bodyBuilder(builder, loc, val, indicesArray);
return {};
});
}
void mlir::sparse_tensor::sizesFromSrc(OpBuilder &builder,
SmallVector<Value, 4> &sizes,
Location loc, Value src) {
unsigned rank = src.getType().cast<ShapedType>().getRank();
for (unsigned i = 0; i < rank; i++)
sizes.push_back(linalg::createOrFoldDimOp(builder, loc, src, i));
}
Operation *mlir::sparse_tensor::getTop(Operation *op) {
for (; isa<scf::ForOp>(op->getParentOp()) ||
isa<scf::WhileOp>(op->getParentOp()) ||
isa<scf::ParallelOp>(op->getParentOp()) ||
isa<scf::IfOp>(op->getParentOp());
op = op->getParentOp())
;
return op;
}