From 9b9f7ff5d4eeeb4172241997e4e3752dfe701f6c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Fri, 22 Jun 2018 10:39:19 -0700 Subject: [PATCH] Implement enough of a lexer and parser for MLIR to parse extfunc's without arguments. PiperOrigin-RevId: 201706570 --- mlir/include/mlir/Parser.h | 38 ++++++ mlir/lib/Parser/Lexer.cpp | 137 ++++++++++++++++++++ mlir/lib/Parser/Lexer.h | 65 ++++++++++ mlir/lib/Parser/Parser.cpp | 186 ++++++++++++++++++++++++++++ mlir/lib/Parser/Token.cpp | 37 ++++++ mlir/lib/Parser/Token.h | 98 +++++++++++++++ mlir/test/IR/check-help-output.mlir | 20 +-- mlir/test/IR/parser.mlir | 15 +++ mlir/tools/mlir-opt/mlir-opt.cpp | 26 +++- 9 files changed, 603 insertions(+), 19 deletions(-) create mode 100644 mlir/include/mlir/Parser.h create mode 100644 mlir/lib/Parser/Lexer.cpp create mode 100644 mlir/lib/Parser/Lexer.h create mode 100644 mlir/lib/Parser/Parser.cpp create mode 100644 mlir/lib/Parser/Token.cpp create mode 100644 mlir/lib/Parser/Token.h create mode 100644 mlir/test/IR/parser.mlir diff --git a/mlir/include/mlir/Parser.h b/mlir/include/mlir/Parser.h new file mode 100644 index 000000000000..cb5f1c0e6287 --- /dev/null +++ b/mlir/include/mlir/Parser.h @@ -0,0 +1,38 @@ +//===- Parser.h - MLIR Parser Library Interface -----------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file is contains the interface to the MLIR parser library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_PARSER_H +#define MLIR_PARSER_H + +namespace llvm { + class SourceMgr; +} + +namespace mlir { +class Module; + +/// This parses the file specified by the indicated SourceMgr and returns an +/// MLIR module if it was valid. If not, it emits diagnostics and returns null. +Module *parseSourceFile(llvm::SourceMgr &sourceMgr); + +} // end namespace mlir + +#endif // MLIR_PARSER_H diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp new file mode 100644 index 000000000000..5958658b797a --- /dev/null +++ b/mlir/lib/Parser/Lexer.cpp @@ -0,0 +1,137 @@ +//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the lexer for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "Lexer.h" +#include "llvm/Support/SourceMgr.h" +using namespace mlir; +using llvm::SMLoc; +using llvm::SourceMgr; + +Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) { + auto bufferID = sourceMgr.getMainFileID(); + curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer(); + curPtr = curBuffer.begin(); +} + +/// emitError - Emit an error message and return an Token::error token. +Token Lexer::emitError(const char *loc, const Twine &message) { + // TODO(clattner): If/when we want to implement a -verify mode, this will need + // to package up errors into SMDiagnostic and report them. + sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error, + message); + return formToken(Token::error, loc); +} + +Token Lexer::lexToken() { + const char *tokStart = curPtr; + + switch (*curPtr++) { + default: + // Handle bare identifiers. + if (isalpha(curPtr[-1])) + return lexBareIdentifierOrKeyword(tokStart); + + // Unknown character, emit an error. + return emitError(tokStart, "unexpected character"); + + case 0: + // This may either be a nul character in the source file or may be the EOF + // marker that llvm::MemoryBuffer guarantees will be there. + if (curPtr-1 == curBuffer.end()) + return formToken(Token::eof, tokStart); + + LLVM_FALLTHROUGH; + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return lexToken(); + + case '(': return formToken(Token::l_paren, tokStart); + case ')': return formToken(Token::r_paren, tokStart); + case '<': return formToken(Token::less, tokStart); + case '>': return formToken(Token::greater, tokStart); + + case ';': return lexComment(); + case '@': return lexAtIdentifier(tokStart); + } +} + +/// Lex a comment line, starting with a semicolon. +/// +/// TODO: add a regex for comments here and to the spec. +/// +Token Lexer::lexComment() { + while (true) { + switch (*curPtr++) { + case '\n': + case '\r': + // Newline is end of comment. + return lexToken(); + case 0: + // If this is the end of the buffer, end the comment. + if (curPtr-1 == curBuffer.end()) { + --curPtr; + return lexToken(); + } + LLVM_FALLTHROUGH; + default: + // Skip over other characters. + break; + } + } +} + +/// Lex a bare identifier or keyword that starts with a letter. +/// +/// bare-id ::= letter (letter|digit)* +/// +Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) { + // Match the rest of the identifier regex: [0-9a-zA-Z]* + while (isalpha(*curPtr) || isdigit(*curPtr)) + ++curPtr; + + // Check to see if this identifier is a keyword. + StringRef spelling(tokStart, curPtr-tokStart); + + Token::TokenKind kind = llvm::StringSwitch(spelling) + .Case("cfgfunc", Token::kw_cfgfunc) + .Case("extfunc", Token::kw_extfunc) + .Case("mlfunc", Token::kw_mlfunc) + .Default(Token::bare_identifier); + + return Token(kind, spelling); +} + +/// Lex an '@foo' identifier. +/// +/// function-id ::= `@` bare-id +/// +Token Lexer::lexAtIdentifier(const char *tokStart) { + // These always start with a letter. + if (!isalpha(*curPtr++)) + return emitError(curPtr-1, "expected letter in @ identifier"); + + while (isalpha(*curPtr) || isdigit(*curPtr)) + ++curPtr; + return formToken(Token::at_identifier, tokStart); +} diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h new file mode 100644 index 000000000000..5886c5c387ec --- /dev/null +++ b/mlir/lib/Parser/Lexer.h @@ -0,0 +1,65 @@ +//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file declares the MLIR Lexer class. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_LIB_PARSER_LEXER_H +#define MLIR_LIB_PARSER_LEXER_H + +#include "Token.h" + +namespace llvm { + class SourceMgr; +} + +namespace mlir { + +/// This class breaks up the current file into a token stream. +class Lexer { + llvm::SourceMgr &sourceMgr; + + StringRef curBuffer; + const char *curPtr; + + Lexer(const Lexer&) = delete; + void operator=(const Lexer&) = delete; +public: + explicit Lexer(llvm::SourceMgr &sourceMgr); + + llvm::SourceMgr &getSourceMgr() { return sourceMgr; } + + Token lexToken(); + +private: + // Helpers. + Token formToken(Token::TokenKind kind, const char *tokStart) { + return Token(kind, StringRef(tokStart, curPtr-tokStart)); + } + + Token emitError(const char *loc, const Twine &message); + + // Lexer implementation methods. + Token lexComment(); + Token lexBareIdentifierOrKeyword(const char *tokStart); + Token lexAtIdentifier(const char *tokStart); +}; + +} // end namespace mlir + +#endif // MLIR_LIB_PARSER_LEXER_H diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp new file mode 100644 index 000000000000..abad611d8468 --- /dev/null +++ b/mlir/lib/Parser/Parser.cpp @@ -0,0 +1,186 @@ +//===- Parser.cpp - MLIR Parser Implementation ----------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the parser for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Parser.h" +#include "Lexer.h" +#include "mlir/IR/Module.h" +#include "llvm/Support/SourceMgr.h" +using namespace mlir; +using llvm::SourceMgr; + +namespace { +/// Simple enum to make code read better. Failure is "true" in a boolean +/// context. +enum ParseResult { + ParseSuccess, + ParseFailure +}; + +/// Main parser implementation. +class Parser { + public: + Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){ + module.reset(new Module()); + } + + Module *parseModule(); +private: + // State. + Lexer lex; + + // This is the next token that hasn't been consumed yet. + Token curToken; + + // This is the result module we are parsing into. + std::unique_ptr module; + +private: + // Helper methods. + + /// Emit an error and return failure. + ParseResult emitError(const Twine &message); + + /// Advance the current lexer onto the next token. + void consumeToken() { + assert(curToken.isNot(Token::eof, Token::error) && + "shouldn't advance past EOF or errors"); + curToken = lex.lexToken(); + } + + /// Advance the current lexer onto the next token, asserting what the expected + /// current token is. This is preferred to the above method because it leads + /// to more self-documenting code with better checking. + void consumeToken(Token::TokenKind kind) { + assert(curToken.is(kind) && "consumed an unexpected token"); + consumeToken(); + } + + // Type parsing. + + // Top level entity parsing. + ParseResult parseFunctionSignature(StringRef &name); + ParseResult parseExtFunc(); +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Helper methods. +//===----------------------------------------------------------------------===// + +ParseResult Parser::emitError(const Twine &message) { + // TODO(clattner): If/when we want to implement a -verify mode, this will need + // to package up errors into SMDiagnostic and report them. + lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error, + message); + return ParseFailure; +} + + +//===----------------------------------------------------------------------===// +// Type Parsing +//===----------------------------------------------------------------------===// + +// ... TODO + +//===----------------------------------------------------------------------===// +// Top-level entity parsing. +//===----------------------------------------------------------------------===// + +/// Parse a function signature, starting with a name and including the parameter +/// list. +/// +/// argument-list ::= type (`,` type)* | /*empty*/ +/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)? +/// +ParseResult Parser::parseFunctionSignature(StringRef &name) { + if (curToken.isNot(Token::at_identifier)) + return emitError("expected a function identifier like '@foo'"); + + name = curToken.getSpelling().drop_front(); + consumeToken(Token::at_identifier); + + if (curToken.isNot(Token::l_paren)) + return emitError("expected '(' in function signature"); + consumeToken(Token::l_paren); + + // TODO: This should actually parse the full grammar here. + + if (curToken.isNot(Token::r_paren)) + return emitError("expected ')' in function signature"); + consumeToken(Token::r_paren); + + return ParseSuccess; +} + + +/// External function declarations. +/// +/// ext-func ::= `extfunc` function-signature +/// +ParseResult Parser::parseExtFunc() { + consumeToken(Token::kw_extfunc); + + StringRef name; + if (parseFunctionSignature(name)) + return ParseFailure; + + + // Okay, the external function definition was parsed correctly. + module->functionList.push_back(new Function(name)); + return ParseSuccess; +} + + +/// This is the top-level module parser. +Module *Parser::parseModule() { + while (1) { + switch (curToken.getKind()) { + default: + emitError("expected a top level entity"); + return nullptr; + + // If we got to the end of the file, then we're done. + case Token::eof: + return module.release(); + + // If we got an error token, then the lexer already emitted an error, just + // stop. Someday we could introduce error recovery if there was demand for + // it. + case Token::error: + return nullptr; + + case Token::kw_extfunc: + if (parseExtFunc()) + return nullptr; + break; + + // TODO: cfgfunc, mlfunc, affine entity declarations, etc. + } + } +} + +//===----------------------------------------------------------------------===// + +/// This parses the file specified by the indicated SourceMgr and returns an +/// MLIR module if it was valid. If not, it emits diagnostics and returns null. +Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) { + return Parser(sourceMgr).parseModule(); +} diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp new file mode 100644 index 000000000000..551bd1e1da64 --- /dev/null +++ b/mlir/lib/Parser/Token.cpp @@ -0,0 +1,37 @@ +//===- Token.cpp - MLIR Token Implementation ------------------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements the Token class for the MLIR textual form. +// +//===----------------------------------------------------------------------===// + +#include "Token.h" +using namespace mlir; +using llvm::SMLoc; +using llvm::SMRange; + +SMLoc Token::getLoc() const { + return SMLoc::getFromPointer(spelling.data()); +} + +SMLoc Token::getEndLoc() const { + return SMLoc::getFromPointer(spelling.data() + spelling.size()); +} + +SMRange Token::getLocRange() const { + return SMRange(getLoc(), getEndLoc()); +} diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h new file mode 100644 index 000000000000..03c967e4cf3e --- /dev/null +++ b/mlir/lib/Parser/Token.h @@ -0,0 +1,98 @@ +//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#ifndef MLIR_LIB_PARSER_TOKEN_H +#define MLIR_LIB_PARSER_TOKEN_H + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SMLoc.h" + +namespace mlir { + +/// This represents a token in the MLIR syntax. +class Token { +public: + enum TokenKind { + // Markers + eof, error, + + // Identifiers. + bare_identifier, // foo + at_identifier, // @foo + // TODO: @@foo, etc. + + // Punctuation. + l_paren, r_paren, // ( ) + less, greater, // < > + // TODO: More punctuation. + + // Keywords. + kw_cfgfunc, + kw_extfunc, + kw_mlfunc, + // TODO: More keywords. + }; + + Token(TokenKind kind, StringRef spelling) + : kind(kind), spelling(spelling) {} + + // Return the bytes that make up this token. + StringRef getSpelling() const { return spelling; } + + // Token classification. + TokenKind getKind() const { return kind; } + bool is(TokenKind K) const { return kind == K; } + + bool isAny(TokenKind k1, TokenKind k2) const { + return is(k1) || is(k2); + } + + /// Return true if this token is one of the specified kinds. + template + bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const { + if (is(k1)) + return true; + return isAny(k2, k3, others...); + } + + bool isNot(TokenKind k) const { return kind != k; } + + /// Return true if this token isn't one of the specified kinds. + template + bool isNot(TokenKind k1, TokenKind k2, T... others) const { + return !isAny(k1, k2, others...); + } + + + /// Location processing. + llvm::SMLoc getLoc() const; + llvm::SMLoc getEndLoc() const; + llvm::SMRange getLocRange() const; + +private: + /// Discriminator that indicates the sort of token this is. + TokenKind kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef spelling; +}; + +} // end namespace mlir + +#endif // MLIR_LIB_PARSER_TOKEN_H diff --git a/mlir/test/IR/check-help-output.mlir b/mlir/test/IR/check-help-output.mlir index 3a624149e4da..617ae788ccd0 100644 --- a/mlir/test/IR/check-help-output.mlir +++ b/mlir/test/IR/check-help-output.mlir @@ -1,15 +1,7 @@ -// TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN -// statements (perhaps through using lit config substitutions). -// -// RUN: %S/../../mlir-opt --help | FileCheck --check-prefix=CHECKHELP %s -// RUN: %S/../../mlir-opt %s -o - | FileCheck %s -// -// CHECKHELP: OVERVIEW: MLIR modular optimizer driver +; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN +; statements (perhaps through using lit config substitutions). +; +; RUN: %S/../../mlir-opt --help | FileCheck %s +; +; CHECK: OVERVIEW: MLIR modular optimizer driver - -// Right now the input is completely ignored. -extfunc @foo() -extfunc @bar() - -// CHECK: extfunc @foo() -// CHECK: extfunc @bar() diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir new file mode 100644 index 000000000000..21b6a040c8cf --- /dev/null +++ b/mlir/test/IR/parser.mlir @@ -0,0 +1,15 @@ +; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN +; statements (perhaps through using lit config substitutions). +; +; RUN: %S/../../mlir-opt %s -o - | FileCheck %s + + +; CHECK: extfunc @foo() +extfunc @foo() + +; CHECK: extfunc @bar() +extfunc @bar() + +; CHECK: extfunc @baz() +extfunc @baz() + diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index dee86ed2624b..b5a548dd2116 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -22,7 +22,9 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/Module.h" +#include "mlir/Parser.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/ToolOutputFile.h" @@ -56,13 +58,27 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n"); - // Instantiate an IR object. - Module m; - m.functionList.push_back(new Function("foo")); - m.functionList.push_back(new Function("bar")); + // Set up the input file. + auto fileOrErr = MemoryBuffer::getFileOrSTDIN(inputFilename); + if (std::error_code error = fileOrErr.getError()) { + llvm::errs() << argv[0] << ": could not open input file '" << inputFilename + << "': " << error.message() << "\n"; + return 1; + } + + // Tell sourceMgr about this buffer, which is what the parser will pick up. + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), SMLoc()); + + // Parse the input file and emit any errors. + std::unique_ptr module(parseSourceFile(sourceMgr)); + if (!module) return 1; // Print the output. auto output = getOutputStream(); - m.print(output->os()); + module->print(output->os()); output->keep(); + + // Success. + return 0; }