//===- CodegenUtils.cpp - Utilities for generating MLIR -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "CodegenUtils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/Types.h" #include "mlir/IR/Value.h" using namespace mlir; using namespace mlir::sparse_tensor; /// Generates a pointer/index load from the sparse storage scheme. Narrower /// data types need to be zero extended before casting the value into the /// index type used for looping and indexing. static Value genIndexLoad(OpBuilder &builder, Location loc, Value ptr, Value s) { // For the scalar case, we simply zero extend narrower indices into 64-bit // values before casting to index without a performance penalty. Here too, // however, indices that already are 64-bit, in theory, cannot express the // full range as explained above. Value load = builder.create(loc, ptr, s); if (!load.getType().isa()) { if (load.getType().getIntOrFloatBitWidth() < 64) load = builder.create(loc, builder.getI64Type(), load); load = builder.create(loc, builder.getIndexType(), load); } return load; } /// If the tensor is a sparse constant, generates and returns the pair of /// the constants for the indices and the values. static Optional> genSplitSparseConstant(OpBuilder &builder, Location loc, Value tensor) { if (auto constOp = tensor.getDefiningOp()) { if (auto attr = constOp.getValue().dyn_cast()) { DenseElementsAttr indicesAttr = attr.getIndices(); Value indices = builder.create(loc, indicesAttr); DenseElementsAttr valuesAttr = attr.getValues(); Value values = builder.create(loc, valuesAttr); return std::make_pair(indices, values); } } return {}; } /// Generates the code to copy the index at indices[ivs] to ind, and return /// the value at value[ivs]. static Value genIndexAndValueForSparse(OpBuilder &builder, Location loc, Value indices, Value values, SmallVectorImpl &indicesArray, ValueRange ivs, unsigned rank) { for (unsigned i = 0; i < rank; i++) { Value idx = constantIndex(builder, loc, i); Value val = builder.create(loc, indices, ValueRange{ivs[0], idx}); val = builder.create(loc, builder.getIndexType(), val); // builder.create(loc, val, ind, idx); indicesArray.push_back(val); } return builder.create(loc, values, ivs[0]); } /// Generates the code to read the value from tensor[ivs], and conditionally /// stores the indices ivs to the memory in ind. The generated code looks like /// the following and the insertion point after this routine is inside the /// if-then branch behind the assignment to ind. This is to ensure that the /// code that uses the ind, such as an addEltX call generated after, is inside /// the if-then branch. /// if (tensor[ivs] != 0) /// ind = ivs static Value genIndexAndValueForDense(OpBuilder &builder, Location loc, Value tensor, SmallVectorImpl &indicesArray, ValueRange ivs) { Value val = genValueForDense(builder, loc, tensor, ivs); indicesArray.append(ivs.begin(), ivs.end()); return val; } void sparse_tensor::foreachFieldInSparseTensor( const SparseTensorEncodingAttr enc, llvm::function_ref callback) { assert(enc); #define RETURN_ON_FALSE(idx, kind, dim, dlt) \ if (!(callback(idx, kind, dim, dlt))) \ return; RETURN_ON_FALSE(dimSizesIdx, SparseTensorFieldKind::DimSizes, -1u, DimLevelType::Undef); RETURN_ON_FALSE(memSizesIdx, SparseTensorFieldKind::MemSizes, -1u, DimLevelType::Undef); static_assert(dataFieldIdx == memSizesIdx + 1); unsigned fieldIdx = dataFieldIdx; // Per-dimension storage. for (unsigned r = 0, rank = enc.getDimLevelType().size(); r < rank; r++) { // Dimension level types apply in order to the reordered dimension. // As a result, the compound type can be constructed directly in the given // order. auto dlt = getDimLevelType(enc, r); if (isCompressedDLT(dlt)) { RETURN_ON_FALSE(fieldIdx++, SparseTensorFieldKind::PtrMemRef, r, dlt); RETURN_ON_FALSE(fieldIdx++, SparseTensorFieldKind::IdxMemRef, r, dlt); } else if (isSingletonDLT(dlt)) { RETURN_ON_FALSE(fieldIdx++, SparseTensorFieldKind::IdxMemRef, r, dlt); } else { assert(isDenseDLT(dlt)); // no fields } } // The values array. RETURN_ON_FALSE(fieldIdx++, SparseTensorFieldKind::ValMemRef, -1u, DimLevelType::Undef); #undef RETURN_ON_FALSE } void sparse_tensor::foreachFieldAndTypeInSparseTensor( RankedTensorType rType, llvm::function_ref callback) { auto enc = getSparseTensorEncoding(rType); assert(enc); // Construct the basic types. Type indexType = IndexType::get(enc.getContext()); Type idxType = enc.getIndexType(); Type ptrType = enc.getPointerType(); Type eltType = rType.getElementType(); unsigned rank = rType.getShape().size(); // memref dimSizes Type dimSizeType = MemRefType::get({rank}, indexType); // memref memSizes Type memSizeType = MemRefType::get({getNumDataFieldsFromEncoding(enc)}, indexType); // memref pointers Type ptrMemType = MemRefType::get({ShapedType::kDynamic}, ptrType); // memref indices Type idxMemType = MemRefType::get({ShapedType::kDynamic}, idxType); // memref values Type valMemType = MemRefType::get({ShapedType::kDynamic}, eltType); foreachFieldInSparseTensor( enc, [dimSizeType, memSizeType, ptrMemType, idxMemType, valMemType, callback](unsigned fieldIdx, SparseTensorFieldKind fieldKind, unsigned dim, DimLevelType dlt) -> bool { switch (fieldKind) { case SparseTensorFieldKind::DimSizes: return callback(dimSizeType, fieldIdx, fieldKind, dim, dlt); case SparseTensorFieldKind::MemSizes: return callback(memSizeType, fieldIdx, fieldKind, dim, dlt); case SparseTensorFieldKind::PtrMemRef: return callback(ptrMemType, fieldIdx, fieldKind, dim, dlt); case SparseTensorFieldKind::IdxMemRef: return callback(idxMemType, fieldIdx, fieldKind, dim, dlt); case SparseTensorFieldKind::ValMemRef: return callback(valMemType, fieldIdx, fieldKind, dim, dlt); }; llvm_unreachable("unrecognized field kind"); }); } unsigned sparse_tensor::getNumFieldsFromEncoding(SparseTensorEncodingAttr enc) { unsigned numFields = 0; foreachFieldInSparseTensor(enc, [&numFields](unsigned, SparseTensorFieldKind, unsigned, DimLevelType) -> bool { numFields++; return true; }); return numFields; } unsigned sparse_tensor::getNumDataFieldsFromEncoding(SparseTensorEncodingAttr enc) { unsigned numFields = 0; // one value memref foreachFieldInSparseTensor(enc, [&numFields](unsigned fidx, SparseTensorFieldKind, unsigned, DimLevelType) -> bool { if (fidx >= dataFieldIdx) numFields++; return true; }); assert(numFields == getNumFieldsFromEncoding(enc) - dataFieldIdx); return numFields; } //===----------------------------------------------------------------------===// // Sparse tensor loop emitter class implementations //===----------------------------------------------------------------------===// SparseTensorLoopEmitter::SparseTensorLoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput, bool isSparseOut, ArrayRef topSort) : loopTag(loopTag), hasOutput(hasOutput), isSparseOut(isSparseOut), tensors(tensors.begin(), tensors.end()), dimTypes(tensors.size()), pidxs(tensors.size()), coord(tensors.size()), highs(tensors.size()), ptrBuffer(tensors.size()), idxBuffer(tensors.size()), valBuffer(tensors.size()), loopStack(), sparsiferLoopLvlMap(topSort.size(), 0) { for (size_t tid = 0, e = tensors.size(); tid < e; tid++) { auto t = tensors[tid]; // a scalar or 0-dimension tensors if (isZeroRankedTensorOrScalar(t.getType())) continue; auto rtp = t.getType().cast(); auto rank = static_cast(rtp.getRank()); auto enc = getSparseTensorEncoding(rtp); // We always treat sparse output tensor as dense so that we always iterate // it based on dim size. if (enc && !(isOutputTensor(tid) && isSparseOut)) for (auto dimTp : enc.getDimLevelType()) dimTypes[tid].push_back(dimTp); else dimTypes[tid].assign(rank, DimLevelType::Dense); // Initialize using empty value. pidxs[tid].assign(rank, Value()); coord[tid].assign(rank, Value()); highs[tid].assign(rank, Value()); ptrBuffer[tid].assign(rank, Value()); idxBuffer[tid].assign(rank, Value()); } for (unsigned i = 0, e = topSort.size(); i < e; i++) { // This is an inverse map of the topologically sorted loop index from // sparsifier. This is needed to map the AffineDimExpr back to the loopStack // index used in loop emitter. sparsiferLoopLvlMap[topSort[i]] = i; } } void SparseTensorLoopEmitter::initializeLoopEmit( OpBuilder &builder, Location loc, SparseTensorLoopEmitter::OutputUpdater updater) { // For every tensor, find lower and upper bound on dimensions, set the // same bounds on loop indices, and obtain dense or sparse buffer(s). for (size_t t = 0, e = tensors.size(); t < e; t++) { auto tensor = tensors[t]; auto rtp = tensor.getType().dyn_cast(); if (!rtp) // Skips only scalar, zero ranked tensor still need to be bufferized and // (probably) filled with zeros by users. continue; auto rank = rtp.getRank(); auto shape = rtp.getShape(); auto enc = getSparseTensorEncoding(rtp); auto dynShape = {ShapedType::kDynamic}; // Scan all dimensions of current tensor. for (int64_t d = 0; d < rank; d++) { // This should be called only once at beginning. assert(!ptrBuffer[t][d] && !idxBuffer[t][d] && !highs[t][d]); // Handle sparse storage schemes. if (isCompressedDLT(dimTypes[t][d])) { auto ptrTp = MemRefType::get(dynShape, getPointerOverheadType(builder, enc)); auto indTp = MemRefType::get(dynShape, getIndexOverheadType(builder, enc)); auto dim = builder.getIndexAttr(d); // Generate sparse primitives to obtains pointer and indices. ptrBuffer[t][d] = builder.create(loc, ptrTp, tensor, dim); idxBuffer[t][d] = builder.create(loc, indTp, tensor, dim); } else if (isSingletonDLT(dimTypes[t][d])) { // Singleton dimension, fetch indices. auto indTp = MemRefType::get(dynShape, getIndexOverheadType(builder, enc)); auto dim = builder.getIndexAttr(d); idxBuffer[t][d] = builder.create(loc, indTp, tensor, dim); } else { // Dense dimension, nothing to fetch. assert(isDenseDLT(dimTypes[t][d])); } // Find upper bound in current dimension. unsigned p = toOrigDim(enc, d); Value up = mlir::linalg::createOrFoldDimOp(builder, loc, tensor, p); highs[t][d] = up; } // Perform the required bufferization. Dense inputs materialize // from the input tensors. Sparse inputs use sparse primitives to obtain the // values. // Delegates extra output initialization to clients. bool isOutput = isOutputTensor(t); Type elementType = rtp.getElementType(); if (!enc) { // Non-annotated dense tensors. auto denseTp = MemRefType::get(shape, elementType); Value denseVal = builder.create(loc, denseTp, tensor); // Dense outputs need special handling. if (isOutput && updater) denseVal = updater(builder, loc, denseVal, tensor); valBuffer[t] = denseVal; } else { // Annotated sparse tensors. // We also need the value buffer for annotated all dense `sparse` tensor. auto dynShape = {ShapedType::kDynamic}; auto sparseTp = MemRefType::get(dynShape, elementType); valBuffer[t] = builder.create(loc, sparseTp, tensor); } // NOTE: we can also prepares for 0 dim here in advance, this will hosit // some loop preparation from tensor iteration, but will also (undesirably) // hosit the code ouside if conditions. } } void SparseTensorLoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc, ArrayRef tids, ArrayRef dims) { // Universal Index start from 0 assert(loopSeqStack.size() == loopStack.size()); // Universal index starts from 0 loopSeqStack.emplace_back(constantIndex(builder, loc, 0)); // Prepares for all the tensors used in the current loop sequence. for (auto [tid, dim] : llvm::zip(tids, dims)) prepareLoopOverTensorAtDim(builder, loc, tid, dim); } Value SparseTensorLoopEmitter::genAffine(OpBuilder &builder, AffineExpr a, Location loc) { switch (a.getKind()) { case AffineExprKind::DimId: { unsigned idx = a.cast().getPosition(); return loopStack[sparsiferLoopLvlMap[idx]].iv; } case AffineExprKind::Add: { auto binOp = a.cast(); return builder.create( loc, genAffine(builder, binOp.getLHS(), loc), genAffine(builder, binOp.getRHS(), loc)); } case AffineExprKind::Mul: { auto binOp = a.cast(); return builder.create( loc, genAffine(builder, binOp.getLHS(), loc), genAffine(builder, binOp.getRHS(), loc)); } case AffineExprKind::Constant: { int64_t c = a.cast().getValue(); return constantIndex(builder, loc, c); } default: llvm_unreachable("unexpected affine subscript"); } } Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); // TODO: support multiple return on parallel for? assert(!isParallel || reduc.size() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; bool isSparseInput = isCompressedDLT(dimType) || isSingletonDLT(dimType); assert(isDenseDLT(dimType) || isCompressedDLT(dimType) || isSingletonDLT(dimType)); Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; Operation *loop = nullptr; Value iv; if (isParallel) { scf::ParallelOp parOp = builder.create(loc, lo, hi, step, reduc); builder.setInsertionPointToStart(parOp.getBody()); assert(parOp.getNumReductions() == reduc.size()); iv = parOp.getInductionVars()[0]; // In-place update on the reduction variable vector. // Note that the init vals is not the actual reduction variables but instead // used as a `special handle` to (temporarily) represent them. The // expression on init vals will be moved into scf.reduce and replaced with // the block arguments when exiting the loop (see exitForLoop). This is // needed as we can not build the actual reduction block and get the actual // reduction varaible before users fill parallel loop body. for (int i = 0, e = reduc.size(); i < e; i++) reduc[i] = parOp.getInitVals()[i]; loop = parOp; } else { scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); builder.setInsertionPointToStart(forOp.getBody()); iv = forOp.getInductionVar(); // In-place update on the reduction variable vector. assert(forOp.getNumRegionIterArgs() == reduc.size()); for (int i = 0, e = reduc.size(); i < e; i++) reduc[i] = forOp.getRegionIterArg(i); loop = forOp; } assert(loop && iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. Value ptr = idxBuffer[tid][dim]; coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv); } else { // Dense tensor, the coordinates is the inducation variable. coord[tid][dim] = iv; // generate pidx for dense dim (pidx = i * sz + j) auto enc = getSparseTensorEncoding(tensors[tid].getType()); if (enc && !isSparseOutput(tid)) pidxs[tid][dim] = genAddress(builder, loc, tid, dim, iv); } // NOTE: we can also prepares for next dim here in advance // Push the loop into stack loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, coord[tid][dim], loopTag); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); return loop; } Operation *SparseTensorLoopEmitter::enterFilterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine, MutableArrayRef reduc) { assert(!affine.isa() && !isDenseDLT(dimTypes[tid][dim])); assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); Value step = constantIndex(builder, loc, 1); Value lo = pidxs[tid][dim]; Value hi = highs[tid][dim]; // TODO: We should instead use a whileOp for filter loop to allow early // break when exceeding (for ordered dimensions). // TODO: There are many other potiential opportunities that we might apply in // the future. E.g., we could use binary search to located the pointer index. scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); // In-place update on the reduction variable vector. assert(forOp.getNumRegionIterArgs() == reduc.size()); for (int i = 0, e = reduc.size(); i < e; i++) reduc[i] = forOp.getRegionIterArg(i); builder.setInsertionPointToStart(forOp.getBody()); Value iv = forOp.getInductionVar(); pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. Value ptr = idxBuffer[tid][dim]; coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv); // Generate an if condition to filter out indices that is not equal to the // result of the affine expression. Value expected = genAffine(builder, affine, loc); auto pred = builder.create(loc, arith::CmpIPredicate::eq, coord[tid][dim], expected); SmallVector types; for (Value red : reduc) { types.push_back(red.getType()); } bool hasReduc = !types.empty(); scf::IfOp ifOp = builder.create(loc, types, pred, /*else*/ hasReduc); if (hasReduc) { // scf.for (a) -> v // %s = scf.if (a) -> v // user-generated code. // else // yield a // yield %s builder.create(loc, ifOp.getResults()); builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); // On mismatch. builder.create(loc, reduc); } // Set the insert point to matched branch. builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); // NOTE: we can also prepares for next dim here in advance // Push the loop into stack loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, coord[tid][dim], nullptr); return forOp; } void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel( OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine) { Value affineV = genAffine(builder, affine, loc); pidxs[tid][dim] = genAddress(builder, loc, tid, dim, affineV); } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( OpBuilder &builder, Location loc, ArrayRef tids, ArrayRef dims, bool needsUniv, MutableArrayRef reduc, ArrayRef extraTids, ArrayRef extraDims) { assert(tids.size() == dims.size()); SmallVector types; SmallVector operands; // Construct the while-loop with a parameter for each index. Type indexType = builder.getIndexType(); for (auto [tid, dim] : llvm::zip(tids, dims)) { if (isCompressedDLT(dimTypes[tid][dim]) || isSingletonDLT(dimTypes[tid][dim])) { assert(pidxs[tid][dim]); types.push_back(indexType); operands.push_back(pidxs[tid][dim]); } } // The position where user-supplied reduction variable starts. for (Value rec : reduc) { types.push_back(rec.getType()); operands.push_back(rec); } if (needsUniv) { types.push_back(indexType); // Update universal index. operands.push_back(loopSeqStack.back()); } assert(types.size() == operands.size()); scf::WhileOp whileOp = builder.create(loc, types, operands); SmallVector locs(types.size(), loc); Block *before = builder.createBlock(&whileOp.getBefore(), {}, types, locs); Block *after = builder.createBlock(&whileOp.getAfter(), {}, types, locs); // Build the "before" region, which effectively consists // of a conjunction of "i < upper" tests on all induction. builder.setInsertionPointToStart(&whileOp.getBefore().front()); Value cond; unsigned o = 0; for (auto [tid, dim] : llvm::zip(tids, dims)) { if (isCompressedDLT(dimTypes[tid][dim]) || isSingletonDLT(dimTypes[tid][dim])) { Value op1 = before->getArgument(o); Value op2 = highs[tid][dim]; Value opc = builder.create(loc, arith::CmpIPredicate::ult, op1, op2); cond = cond ? builder.create(loc, cond, opc) : opc; // Update pidxs[tid][dim] = after->getArgument(o++); } } builder.create(loc, cond, before->getArguments()); // Generates while body. builder.setInsertionPointToStart(&whileOp.getAfter().front()); Value min; for (auto [tid, dim] : llvm::zip(tids, dims)) { // Prepares for next level. if (isCompressedDLT(dimTypes[tid][dim]) || isSingletonDLT(dimTypes[tid][dim])) { Value ptr = idxBuffer[tid][dim]; Value s = pidxs[tid][dim]; Value load = genIndexLoad(builder, loc, ptr, s); coord[tid][dim] = load; if (!needsUniv) { if (min) { Value cmp = builder.create( loc, arith::CmpIPredicate::ult, load, min); min = builder.create(loc, cmp, load, min); } else { min = load; } } } } if (needsUniv) { assert(!min); // Otherwise, universal index is the minimal pidx. min = after->getArguments().back(); } for (auto [tid, dim] : llvm::zip(tids, dims)) { // All dense dim (as well as sparse output tensor) shared the same pidx in // the while loop. if (isDenseDLT(dimTypes[tid][dim])) { pidxs[tid][dim] = min; // generate pidx for dense dim (pidx = i * sz + j) auto enc = getSparseTensorEncoding(tensors[tid].getType()); if (enc && !isSparseOutput(tid)) pidxs[tid][dim] = genAddress(builder, loc, tid, dim, min); } // NOTE: we can also prepares for next dim here in advance } // Sets up the loop stack. loopStack.emplace_back(tids, dims, whileOp, min, loopTag); assert(loopStack.size() == loopSeqStack.size()); // Emits extra locals emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); // Updates reduction variables assert(after->getNumArguments() == o + reduc.size() + (needsUniv ? 1 : 0)); // In-place update on reduction variable. for (unsigned i = 0, e = reduc.size(); i < e; i++) reduc[i] = after->getArgument(o + i); return whileOp; } void SparseTensorLoopEmitter::prepareLoopOverTensorAtDim(OpBuilder &builder, Location loc, size_t tid, size_t dim) { assert(dimTypes[tid].size() > dim); auto dimType = dimTypes[tid][dim]; if (isDenseDLT(dimType)) return; // Either the first dimension, or the previous dimension has been set. assert(dim == 0 || pidxs[tid][dim - 1]); Value c0 = constantIndex(builder, loc, 0); Value c1 = constantIndex(builder, loc, 1); if (isCompressedDLT(dimType)) { Value ptr = ptrBuffer[tid][dim]; Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; pidxs[tid][dim] = genIndexLoad(builder, loc, ptr, pLo); Value pHi = builder.create(loc, pLo, c1); highs[tid][dim] = genIndexLoad(builder, loc, ptr, pHi); return; } if (isSingletonDLT(dimType)) { Value pLo = dim == 0 ? c0 : pidxs[tid][dim - 1]; Value pHi = builder.create(loc, pLo, c1); pidxs[tid][dim] = pLo; highs[tid][dim] = pHi; return; } llvm_unreachable("Unrecognizable dimesion type!"); } void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( OpBuilder &builder, Location loc, ArrayRef tids, ArrayRef dims) { // Initialize dense positions. Note that we generate dense indices of the // output tensor unconditionally, since they may not appear in the lattice, // but may be needed for linearized codegen. for (auto [tid, dim] : llvm::zip(tids, dims)) { assert(isDenseDLT(dimTypes[tid][dim])); auto enc = getSparseTensorEncoding(tensors[tid].getType()); if (enc && !isSparseOutput(tid)) { bool validPidx = dim == 0 || pidxs[tid][dim - 1]; if (!validPidx) { // We might not find the pidx for the sparse output tensor as it is // unconditionally required by the sparsification. assert(isOutputTensor(tid)); continue; } pidxs[tid][dim] = genAddress(builder, loc, tid, dim, loopStack.back().iv); // NOTE: we can also prepares for next dim here in advance } } } void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, MutableArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; auto forOp = llvm::dyn_cast(loopInfo.loop); if (forOp) { if (!reduc.empty()) { assert(reduc.size() == forOp.getNumResults()); rewriter.create(loc, reduc); } // Exit the loop. rewriter.setInsertionPointAfter(forOp); // In-place update reduction variables. for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) reduc[i] = forOp.getResult(i); } else { auto parOp = llvm::cast(loopInfo.loop); if (!reduc.empty()) { assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); Operation *redExp = reduc.front().getDefiningOp(); // Reduction expression should have no use. assert(redExp->getUses().empty()); // This must be a binary operation. // NOTE: This is users' responsibilty to ensure the operation are // commutative. assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); Value redVal = parOp.getInitVals().front(); Value curVal; if (redExp->getOperand(0) == redVal) curVal = redExp->getOperand(1); else if (redExp->getOperand(1) == redVal) curVal = redExp->getOperand(0); // One of the operands must be the init value (which is also the // previous reduction value). assert(curVal); // The reduction expression should be the only user of the reduction val // inside the parallel for. unsigned numUsers = 0; for (Operation *op : redVal.getUsers()) { if (op->getParentOp() == parOp) numUsers++; } assert(numUsers == 1); (void)numUsers; // to silence unused variable warning in release build rewriter.setInsertionPointAfter(redExp); auto redOp = rewriter.create(loc, curVal); // Attach to the reduction op. Block *redBlock = &redOp.getRegion().getBlocks().front(); rewriter.setInsertionPointToEnd(redBlock); Operation *newRed = rewriter.clone(*redExp); // Replaces arguments of the reduction expression by using the block // arguments from scf.reduce. rewriter.updateRootInPlace( newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); // Erases the out-dated reduction expression. rewriter.eraseOp(redExp); rewriter.setInsertionPointToEnd(redBlock); rewriter.create(loc, newRed->getResult(0)); } rewriter.setInsertionPointAfter(parOp); // In-place update reduction variables. for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) reduc[i] = parOp.getResult(i); } // Finished iterating a tensor, clean up // We only do the clean up on for loop as while loops do not necessarily // finish the iteration on a sparse tensor for (auto [tid, dim] : llvm::zip(tids, dims)) { // Reset to null. coord[tid][dim] = Value(); pidxs[tid][dim] = Value(); // Dense dimension, high is fixed. if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } } void SparseTensorLoopEmitter::exitCoIterationLoop( OpBuilder &builder, Location loc, MutableArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; Value iv = loopStack.back().iv; // Generation while loop induction at the end. builder.setInsertionPointToEnd(&whileOp.getAfter().front()); // Finalize the induction. Note that the induction could be performed // in the individual if-branches to avoid re-evaluating the conditions. // However, that would result in a rather elaborate forest of yield // instructions during code generation. Moreover, performing the induction // after the if-statements more closely resembles code generated by TACO. unsigned o = 0; SmallVector operands; Value one = constantIndex(builder, loc, 1); for (auto [tid, dim] : llvm::zip(tids, dims)) { if (isCompressedDLT(dimTypes[tid][dim]) || isSingletonDLT(dimTypes[tid][dim])) { Value op1 = coord[tid][dim]; Value op3 = pidxs[tid][dim]; Value cmp = builder.create(loc, arith::CmpIPredicate::eq, op1, iv); Value add = builder.create(loc, op3, one); operands.push_back(builder.create(loc, cmp, add, op3)); // Following loops continue iteration from the break point of the // current while loop. pidxs[tid][dim] = whileOp->getResult(o++); // The coordinates are invalid now. coord[tid][dim] = nullptr; // highs remains unchanged. } } // Reduction value from users. for (auto &i : reduc) { operands.push_back(i); // In place update reduction variable. i = whileOp->getResult(o++); } // An (optional) universal index. if (operands.size() < whileOp.getNumResults()) { assert(operands.size() + 1 == whileOp.getNumResults()); // The last one is the universial index. operands.push_back(builder.create(loc, iv, one)); // update the loop starting point of current loop sequence loopSeqStack.back() = whileOp->getResult(o++); } assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); } void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, Location loc, MutableArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { exitCoIterationLoop(rewriter, loc, reduc); } else { exitForLoop(rewriter, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); } //===----------------------------------------------------------------------===// // ExecutionEngine/SparseTensorUtils helper functions. //===----------------------------------------------------------------------===// OverheadType mlir::sparse_tensor::overheadTypeEncoding(unsigned width) { switch (width) { case 64: return OverheadType::kU64; case 32: return OverheadType::kU32; case 16: return OverheadType::kU16; case 8: return OverheadType::kU8; case 0: return OverheadType::kIndex; } llvm_unreachable("Unsupported overhead bitwidth"); } OverheadType mlir::sparse_tensor::overheadTypeEncoding(Type tp) { if (tp.isIndex()) return OverheadType::kIndex; if (auto intTp = tp.dyn_cast()) return overheadTypeEncoding(intTp.getWidth()); llvm_unreachable("Unknown overhead type"); } Type mlir::sparse_tensor::getOverheadType(Builder &builder, OverheadType ot) { switch (ot) { case OverheadType::kIndex: return builder.getIndexType(); case OverheadType::kU64: return builder.getIntegerType(64); case OverheadType::kU32: return builder.getIntegerType(32); case OverheadType::kU16: return builder.getIntegerType(16); case OverheadType::kU8: return builder.getIntegerType(8); } llvm_unreachable("Unknown OverheadType"); } OverheadType mlir::sparse_tensor::pointerOverheadTypeEncoding( const SparseTensorEncodingAttr &enc) { return overheadTypeEncoding(enc.getPointerBitWidth()); } OverheadType mlir::sparse_tensor::indexOverheadTypeEncoding( const SparseTensorEncodingAttr &enc) { return overheadTypeEncoding(enc.getIndexBitWidth()); } Type mlir::sparse_tensor::getPointerOverheadType( Builder &builder, const SparseTensorEncodingAttr &enc) { return getOverheadType(builder, pointerOverheadTypeEncoding(enc)); } Type mlir::sparse_tensor::getIndexOverheadType( Builder &builder, const SparseTensorEncodingAttr &enc) { return getOverheadType(builder, indexOverheadTypeEncoding(enc)); } // TODO: Adjust the naming convention for the constructors of // `OverheadType` so we can use the `MLIR_SPARSETENSOR_FOREVERY_O` x-macro // here instead of `MLIR_SPARSETENSOR_FOREVERY_FIXED_O`; to further reduce // the possibility of typo bugs or things getting out of sync. StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(OverheadType ot) { switch (ot) { case OverheadType::kIndex: return "0"; #define CASE(ONAME, O) \ case OverheadType::kU##ONAME: \ return #ONAME; MLIR_SPARSETENSOR_FOREVERY_FIXED_O(CASE) #undef CASE } llvm_unreachable("Unknown OverheadType"); } StringRef mlir::sparse_tensor::overheadTypeFunctionSuffix(Type tp) { return overheadTypeFunctionSuffix(overheadTypeEncoding(tp)); } PrimaryType mlir::sparse_tensor::primaryTypeEncoding(Type elemTp) { if (elemTp.isF64()) return PrimaryType::kF64; if (elemTp.isF32()) return PrimaryType::kF32; if (elemTp.isF16()) return PrimaryType::kF16; if (elemTp.isBF16()) return PrimaryType::kBF16; if (elemTp.isInteger(64)) return PrimaryType::kI64; if (elemTp.isInteger(32)) return PrimaryType::kI32; if (elemTp.isInteger(16)) return PrimaryType::kI16; if (elemTp.isInteger(8)) return PrimaryType::kI8; if (auto complexTp = elemTp.dyn_cast()) { auto complexEltTp = complexTp.getElementType(); if (complexEltTp.isF64()) return PrimaryType::kC64; if (complexEltTp.isF32()) return PrimaryType::kC32; } llvm_unreachable("Unknown primary type"); } StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(PrimaryType pt) { switch (pt) { #define CASE(VNAME, V) \ case PrimaryType::k##VNAME: \ return #VNAME; MLIR_SPARSETENSOR_FOREVERY_V(CASE) #undef CASE } llvm_unreachable("Unknown PrimaryType"); } StringRef mlir::sparse_tensor::primaryTypeFunctionSuffix(Type elemTp) { return primaryTypeFunctionSuffix(primaryTypeEncoding(elemTp)); } //===----------------------------------------------------------------------===// // Misc code generators. //===----------------------------------------------------------------------===// mlir::Attribute mlir::sparse_tensor::getOneAttr(Builder &builder, Type tp) { if (tp.isa()) return builder.getFloatAttr(tp, 1.0); if (tp.isa()) return builder.getIndexAttr(1); if (auto intTp = tp.dyn_cast()) return builder.getIntegerAttr(tp, APInt(intTp.getWidth(), 1)); if (tp.isa()) { auto shapedTp = tp.cast(); if (auto one = getOneAttr(builder, shapedTp.getElementType())) return DenseElementsAttr::get(shapedTp, one); } llvm_unreachable("Unsupported attribute type"); } Value mlir::sparse_tensor::genIsNonzero(OpBuilder &builder, mlir::Location loc, Value v) { Type tp = v.getType(); Value zero = constantZero(builder, loc, tp); if (tp.isa()) return builder.create(loc, arith::CmpFPredicate::UNE, v, zero); if (tp.isIntOrIndex()) return builder.create(loc, arith::CmpIPredicate::ne, v, zero); if (tp.dyn_cast()) return builder.create(loc, v, zero); llvm_unreachable("Non-numeric type"); } void mlir::sparse_tensor::genReshapeDstShape( Location loc, PatternRewriter &rewriter, SmallVectorImpl &dstShape, ArrayRef srcShape, ArrayRef staticDstShape, ArrayRef reassociation) { // Collapse shape. if (reassociation.size() < srcShape.size()) { unsigned start = 0; for (const auto &map : llvm::enumerate(reassociation)) { auto dstDim = constantIndex(rewriter, loc, 1); for (unsigned i = start; i < start + map.value().size(); i++) { dstDim = rewriter.create(loc, dstDim, srcShape[i]); } dstShape.push_back(dstDim); start = start + map.value().size(); } assert(start == srcShape.size()); return; } // Expand shape. assert(reassociation.size() == srcShape.size()); unsigned start = 0; // Expand the i-th dimension in srcShape. for (unsigned i = 0, size = srcShape.size(); i < size; i++) { const auto &map = reassociation[i]; auto srcDim = srcShape[i]; // Iterate through dimensions expanded from the i-th dimension. for (unsigned j = start; j < start + map.size(); j++) { // There can be only one dynamic sized dimension among dimensions // expanded from the i-th dimension in srcShape. // For example, if srcDim = 8, then the expanded shape could be <2x?x2>, // but not <2x?x?>. if (staticDstShape[j] == ShapedType::kDynamic) { // The expanded dimension has dynamic size. We compute the dimension // by dividing srcDim by the product of the static dimensions. int64_t product = 1; for (unsigned k = start; k < start + map.size(); k++) { if (staticDstShape[k] != ShapedType::kDynamic) { product *= staticDstShape[k]; } } // Compute the dynamic dimension size. Value productVal = constantIndex(rewriter, loc, product); Value dynamicSize = rewriter.create(loc, srcDim, productVal); dstShape.push_back(dynamicSize); } else { // The expanded dimension is statically known. dstShape.push_back(constantIndex(rewriter, loc, staticDstShape[j])); } } start = start + map.size(); } assert(start == staticDstShape.size()); } void mlir::sparse_tensor::translateIndicesArray( OpBuilder &builder, Location loc, ArrayRef reassociation, ValueRange srcIndices, ArrayRef srcShape, ArrayRef dstShape, SmallVectorImpl &dstIndices) { unsigned i = 0; unsigned start = 0; unsigned dstRank = dstShape.size(); unsigned srcRank = srcShape.size(); assert(srcRank == srcIndices.size()); bool isCollapse = srcRank > dstRank; ArrayRef shape = isCollapse ? srcShape : dstShape; // Iterate over reassociation map. for (const auto &map : llvm::enumerate(reassociation)) { // Prepare strides information in dimension slice. Value linear = constantIndex(builder, loc, 1); for (unsigned j = start, end = start + map.value().size(); j < end; j++) { linear = builder.create(loc, linear, shape[j]); } // Start expansion. Value val; if (!isCollapse) val = srcIndices[i]; // Iterate over dimension slice. for (unsigned j = start, end = start + map.value().size(); j < end; j++) { linear = builder.create(loc, linear, shape[j]); if (isCollapse) { Value old = srcIndices[j]; Value mul = builder.create(loc, old, linear); val = val ? builder.create(loc, val, mul) : mul; } else { Value old = val; val = builder.create(loc, val, linear); assert(dstIndices.size() == j); dstIndices.push_back(val); val = builder.create(loc, old, linear); } } // Finalize collapse. if (isCollapse) { assert(dstIndices.size() == i); dstIndices.push_back(val); } start += map.value().size(); i++; } assert(dstIndices.size() == dstRank); } FlatSymbolRefAttr mlir::sparse_tensor::getFunc(ModuleOp module, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface) { MLIRContext *context = module.getContext(); auto result = SymbolRefAttr::get(context, name); auto func = module.lookupSymbol(result.getAttr()); if (!func) { OpBuilder moduleBuilder(module.getBodyRegion()); func = moduleBuilder.create( module.getLoc(), name, FunctionType::get(context, operands.getTypes(), resultType)); func.setPrivate(); if (static_cast(emitCInterface)) func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(), UnitAttr::get(context)); } return result; } func::CallOp mlir::sparse_tensor::createFuncCall( OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface) { auto module = builder.getBlock()->getParentOp()->getParentOfType(); FlatSymbolRefAttr fn = getFunc(module, name, resultType, operands, emitCInterface); return builder.create(loc, resultType, fn, operands); } Type mlir::sparse_tensor::getOpaquePointerType(OpBuilder &builder) { return LLVM::LLVMPointerType::get(builder.getI8Type()); } Value mlir::sparse_tensor::genAlloca(OpBuilder &builder, Location loc, unsigned sz, Type tp) { return genAlloca(builder, loc, constantIndex(builder, loc, sz), tp); } Value mlir::sparse_tensor::genAlloca(OpBuilder &builder, Location loc, Value sz, Type tp) { auto memTp = MemRefType::get({ShapedType::kDynamic}, tp); return builder.create(loc, memTp, ValueRange{sz}); } Value mlir::sparse_tensor::genAllocaScalar(OpBuilder &builder, Location loc, Type tp) { return builder.create(loc, MemRefType::get({}, tp)); } Value mlir::sparse_tensor::allocDenseTensor(OpBuilder &builder, Location loc, RankedTensorType tensorTp, ValueRange sizes) { Type elemTp = tensorTp.getElementType(); auto shape = tensorTp.getShape(); auto memTp = MemRefType::get(shape, elemTp); SmallVector dynamicSizes; for (unsigned i = 0, rank = tensorTp.getRank(); i < rank; i++) { if (shape[i] == ShapedType::kDynamic) dynamicSizes.push_back(sizes[i]); } Value mem = builder.create(loc, memTp, dynamicSizes); Value zero = constantZero(builder, loc, elemTp); builder.create(loc, ValueRange{zero}, ValueRange{mem}); return mem; } void mlir::sparse_tensor::deallocDenseTensor(OpBuilder &builder, Location loc, Value buffer) { builder.create(loc, buffer); } Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, Value tensor, ValueRange ivs) { Value val = builder.create(loc, tensor, ivs); Value cond = genIsNonzero(builder, loc, val); scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); return val; } // FIXME: // 1. Dense tensors loop should be generated by loop emitter. // 2. Support reduction variables to propagate SSA chains properly. void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( OpBuilder &builder, Location loc, Value src, unsigned rank, function_ref bodyBuilder) { SmallVector indicesArray; SmallVector lo; SmallVector hi; SmallVector st; Value zero = constantIndex(builder, loc, 0); Value one = constantIndex(builder, loc, 1); auto indicesValues = genSplitSparseConstant(builder, loc, src); bool isCOOConstant = indicesValues.has_value(); Value indices; Value values; if (isCOOConstant) { indices = indicesValues->first; values = indicesValues->second; lo.push_back(zero); hi.push_back(linalg::createOrFoldDimOp(builder, loc, values, 0)); st.push_back(one); } else { for (unsigned i = 0; i < rank; i++) { lo.push_back(zero); hi.push_back(linalg::createOrFoldDimOp(builder, loc, src, i)); st.push_back(one); } } scf::buildLoopNest( builder, loc, lo, hi, st, {}, [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) -> scf::ValueVector { Value val; if (isCOOConstant) val = genIndexAndValueForSparse(builder, loc, indices, values, indicesArray, ivs, rank); else val = genIndexAndValueForDense(builder, loc, src, indicesArray, ivs); bodyBuilder(builder, loc, val, indicesArray); return {}; }); } void mlir::sparse_tensor::sizesFromSrc(OpBuilder &builder, SmallVectorImpl &sizes, Location loc, Value src) { unsigned rank = src.getType().cast().getRank(); for (unsigned i = 0; i < rank; i++) sizes.push_back(linalg::createOrFoldDimOp(builder, loc, src, i)); } Operation *mlir::sparse_tensor::getTop(Operation *op) { for (; isa(op->getParentOp()) || isa(op->getParentOp()) || isa(op->getParentOp()) || isa(op->getParentOp()); op = op->getParentOp()) ; return op; } void sparse_tensor::foreachInSparseConstant( Location loc, RewriterBase &rewriter, SparseElementsAttr attr, function_ref, Value)> callback) { int64_t rank = attr.getType().getRank(); // Foreach on constant. DenseElementsAttr indicesAttr = attr.getIndices(); DenseElementsAttr valuesAttr = attr.getValues(); SmallVector coords; for (int i = 0, e = valuesAttr.size(); i < e; i++) { coords.clear(); for (int j = 0; j < rank; j++) { auto coordAttr = indicesAttr.getValues()[i * rank + j]; auto coord = rewriter.create(loc, coordAttr.getInt()); // Remaps coordinates. coords.push_back(coord); } Value val; if (attr.getElementType().isa()) { auto valAttr = valuesAttr.getValues()[i]; val = rewriter.create(loc, attr.getElementType(), valAttr); } else { auto valAttr = valuesAttr.getValues()[i]; // Remaps value. val = rewriter.create(loc, valAttr); } assert(val); callback(coords, val); } }