This commit updates how the `SparseTensorConversion` pass handles `NewOp`. It breaks up the underlying `openSparseTensor` function into two parts (`SparseTensorReader::create` and `SparseTensorReader::readSparseTensor`) so that the pass can inject code for constructing `lvlSizes` between those two parts. Migrating the construction of `lvlSizes` out of the runtime and into the pass is a necessary first step toward fully supporting non-permutations. (The alternative would be for the pass to generate a `FuncOp` for performing the construction and then passing that to the runtime; which doesn't seem to have any benefits over the design of this commit.) And since the pass now generates the code to call these two functions, this change also removes the `Action::kFromFile` value from the enum used by `_mlir_ciface_newSparseTensor`. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D138363
207 lines
7.3 KiB
C++
207 lines
7.3 KiB
C++
//===- File.cpp - Parsing sparse tensors from files -----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements parsing and printing of files in one of the
|
|
// following external formats:
|
|
//
|
|
// (1) Matrix Market Exchange (MME): *.mtx
|
|
// https://math.nist.gov/MatrixMarket/formats.html
|
|
//
|
|
// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
|
|
// http://frostt.io/tensors/file-formats.html
|
|
//
|
|
// This file is part of the lightweight runtime support library for sparse
|
|
// tensor manipulations. The functionality of the support library is meant
|
|
// to simplify benchmarking, testing, and debugging MLIR code operating on
|
|
// sparse tensors. However, the provided functionality is **not** part of
|
|
// core MLIR itself.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/ExecutionEngine/SparseTensor/File.h"
|
|
|
|
#include <cctype>
|
|
#include <cstring>
|
|
|
|
using namespace mlir::sparse_tensor;
|
|
|
|
/// Opens the file for reading.
|
|
void SparseTensorReader::openFile() {
|
|
if (file)
|
|
MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename);
|
|
file = fopen(filename, "r");
|
|
if (!file)
|
|
MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename);
|
|
}
|
|
|
|
/// Closes the file.
|
|
void SparseTensorReader::closeFile() {
|
|
if (file) {
|
|
fclose(file);
|
|
file = nullptr;
|
|
}
|
|
}
|
|
|
|
/// Attempts to read a line from the file.
|
|
void SparseTensorReader::readLine() {
|
|
if (!fgets(line, kColWidth, file))
|
|
MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename);
|
|
}
|
|
|
|
char *SparseTensorReader::readCOOIndices(uint64_t *indices) {
|
|
readLine();
|
|
// Local variable for tracking the parser's position in the `line` buffer.
|
|
char *linePtr = line;
|
|
for (uint64_t rank = getRank(), r = 0; r < rank; ++r) {
|
|
// Parse the 1-based index.
|
|
uint64_t idx = strtoul(linePtr, &linePtr, 10);
|
|
// Store the 0-based index.
|
|
indices[r] = idx - 1;
|
|
}
|
|
return linePtr;
|
|
}
|
|
|
|
/// Reads and parses the file's header.
|
|
void SparseTensorReader::readHeader() {
|
|
assert(file && "Attempt to readHeader() before openFile()");
|
|
if (strstr(filename, ".mtx"))
|
|
readMMEHeader();
|
|
else if (strstr(filename, ".tns"))
|
|
readExtFROSTTHeader();
|
|
else
|
|
MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename);
|
|
assert(isValid() && "Failed to read the header");
|
|
}
|
|
|
|
/// Asserts the shape subsumes the actual dimension sizes. Is only
|
|
/// valid after parsing the header.
|
|
void SparseTensorReader::assertMatchesShape(uint64_t rank,
|
|
const uint64_t *shape) const {
|
|
assert(rank == getRank() && "Rank mismatch");
|
|
for (uint64_t r = 0; r < rank; ++r)
|
|
assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
|
|
"Dimension size mismatch");
|
|
}
|
|
|
|
bool SparseTensorReader::canReadAs(PrimaryType valTy) const {
|
|
switch (valueKind_) {
|
|
case ValueKind::kInvalid:
|
|
assert(false && "Must readHeader() before calling canReadAs()");
|
|
return false; // In case assertions are disabled.
|
|
case ValueKind::kPattern:
|
|
return true;
|
|
case ValueKind::kInteger:
|
|
// When the file is specified to store integer values, we still
|
|
// allow implicitly converting those to floating primary-types.
|
|
return isRealPrimaryType(valTy);
|
|
case ValueKind::kReal:
|
|
// When the file is specified to store real/floating values, then
|
|
// we disallow implicit conversion to integer primary-types.
|
|
return isFloatingPrimaryType(valTy);
|
|
case ValueKind::kComplex:
|
|
// When the file is specified to store complex values, then we
|
|
// require a complex primary-type.
|
|
return isComplexPrimaryType(valTy);
|
|
case ValueKind::kUndefined:
|
|
// The "extended" FROSTT format doesn't specify a ValueKind.
|
|
// So we allow implicitly converting the stored values to both
|
|
// integer and floating primary-types.
|
|
return isRealPrimaryType(valTy);
|
|
}
|
|
MLIR_SPARSETENSOR_FATAL("Unknown ValueKind: %d\n",
|
|
static_cast<uint8_t>(valueKind_));
|
|
}
|
|
|
|
/// Helper to convert C-style strings (i.e., '\0' terminated) to lower case.
|
|
static inline void toLower(char *token) {
|
|
for (char *c = token; *c; ++c)
|
|
*c = tolower(*c);
|
|
}
|
|
|
|
/// Idiomatic name for checking string equality.
|
|
static inline bool streq(const char *lhs, const char *rhs) {
|
|
return strcmp(lhs, rhs) == 0;
|
|
}
|
|
|
|
/// Idiomatic name for checking string inequality.
|
|
static inline bool strne(const char *lhs, const char *rhs) {
|
|
return strcmp(lhs, rhs); // aka `!= 0`
|
|
}
|
|
|
|
/// Read the MME header of a general sparse matrix of type real.
|
|
void SparseTensorReader::readMMEHeader() {
|
|
char header[64];
|
|
char object[64];
|
|
char format[64];
|
|
char field[64];
|
|
char symmetry[64];
|
|
// Read header line.
|
|
if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
|
|
symmetry) != 5)
|
|
MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename);
|
|
// Convert all to lowercase up front (to avoid accidental redundancy).
|
|
toLower(header);
|
|
toLower(object);
|
|
toLower(format);
|
|
toLower(field);
|
|
toLower(symmetry);
|
|
// Process `field`, which specify pattern or the data type of the values.
|
|
if (streq(field, "pattern"))
|
|
valueKind_ = ValueKind::kPattern;
|
|
else if (streq(field, "real"))
|
|
valueKind_ = ValueKind::kReal;
|
|
else if (streq(field, "integer"))
|
|
valueKind_ = ValueKind::kInteger;
|
|
else if (streq(field, "complex"))
|
|
valueKind_ = ValueKind::kComplex;
|
|
else
|
|
MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename);
|
|
// Set properties.
|
|
isSymmetric_ = streq(symmetry, "symmetric");
|
|
// Make sure this is a general sparse matrix.
|
|
if (strne(header, "%%matrixmarket") || strne(object, "matrix") ||
|
|
strne(format, "coordinate") ||
|
|
(strne(symmetry, "general") && !isSymmetric_))
|
|
MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n",
|
|
filename);
|
|
// Skip comments.
|
|
while (true) {
|
|
readLine();
|
|
if (line[0] != '%')
|
|
break;
|
|
}
|
|
// Next line contains M N NNZ.
|
|
idata[0] = 2; // rank
|
|
if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
|
|
idata + 1) != 3)
|
|
MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename);
|
|
}
|
|
|
|
/// Read the "extended" FROSTT header. Although not part of the documented
|
|
/// format, we assume that the file starts with optional comments followed
|
|
/// by two lines that define the rank, the number of nonzeros, and the
|
|
/// dimensions sizes (one per rank) of the sparse tensor.
|
|
void SparseTensorReader::readExtFROSTTHeader() {
|
|
// Skip comments.
|
|
while (true) {
|
|
readLine();
|
|
if (line[0] != '#')
|
|
break;
|
|
}
|
|
// Next line contains RANK and NNZ.
|
|
if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2)
|
|
MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename);
|
|
// Followed by a line with the dimension sizes (one per rank).
|
|
for (uint64_t r = 0; r < idata[0]; ++r)
|
|
if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
|
|
MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename);
|
|
readLine(); // end of line
|
|
// The FROSTT format does not define the data type of the nonzero elements.
|
|
valueKind_ = ValueKind::kUndefined;
|
|
}
|