//===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_ #define LIB_TOOLS_PDLL_PARSER_LEXER_H_ #include "mlir/Support/LLVM.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SMLoc.h" namespace llvm { class SourceMgr; } // namespace llvm namespace mlir { struct LogicalResult; namespace pdll { class CodeCompleteContext; namespace ast { class DiagnosticEngine; } // namespace ast //===----------------------------------------------------------------------===// // Token //===----------------------------------------------------------------------===// class Token { public: enum Kind { /// Markers. eof, error, /// Token signifying a code completion location. code_complete, /// Token signifying a code completion location within a string. code_complete_string, /// Keywords. KW_BEGIN, /// Dependent keywords, i.e. those that are treated as keywords depending on /// the current parser context. KW_DEPENDENT_BEGIN, kw_attr, kw_op, kw_type, KW_DEPENDENT_END, /// General keywords. kw_Attr, kw_erase, kw_let, kw_Constraint, kw_not, kw_Op, kw_OpName, kw_Pattern, kw_replace, kw_return, kw_rewrite, kw_Rewrite, kw_Type, kw_TypeRange, kw_Value, kw_ValueRange, kw_with, KW_END, /// Punctuation. arrow, colon, comma, dot, equal, equal_arrow, semicolon, /// Paired punctuation. less, greater, l_brace, r_brace, l_paren, r_paren, l_square, r_square, underscore, /// Tokens. directive, identifier, integer, string_block, string }; Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} /// Given a token containing a string literal, return its value, including /// removing the quote characters and unescaping the contents of the string. std::string getStringValue() const; /// Returns true if the current token is a string literal. bool isString() const { return isAny(Token::string, Token::string_block); } /// Returns true if the current token is a keyword. bool isKeyword() const { return kind > Token::KW_BEGIN && kind < Token::KW_END; } /// Returns true if the current token is a keyword in a dependent context, and /// in any other situation (e.g. variable names) may be treated as an /// identifier. bool isDependentKeyword() const { return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END; } /// Return the bytes that make up this token. StringRef getSpelling() const { return spelling; } /// Return the kind of this token. Kind getKind() const { return kind; } /// Return true if this token is one of the specified kinds. bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); } template bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { return is(k1) || isAny(k2, k3, others...); } /// Return if the token does not have the given kind. bool isNot(Kind k) const { return k != kind; } template bool isNot(Kind k1, Kind k2, T... others) const { return !isAny(k1, k2, others...); } /// Return if the token has the given kind. bool is(Kind k) const { return kind == k; } /// Return a location for the start of this token. SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); } /// Return a location at the end of this token. SMLoc getEndLoc() const { return SMLoc::getFromPointer(spelling.data() + spelling.size()); } /// Return a location for the range of this token. SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); } private: /// Discriminator that indicates the kind of token this is. Kind kind; /// A reference to the entire token contents; this is always a pointer into /// a memory buffer owned by the source manager. StringRef spelling; }; //===----------------------------------------------------------------------===// // Lexer //===----------------------------------------------------------------------===// class Lexer { public: Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, CodeCompleteContext *codeCompleteContext); ~Lexer(); /// Return a reference to the source manager used by the lexer. llvm::SourceMgr &getSourceMgr() { return srcMgr; } /// Return a reference to the diagnostic engine used by the lexer. ast::DiagnosticEngine &getDiagEngine() { return diagEngine; } /// Push an include of the given file. This will cause the lexer to start /// processing the provided file. Returns failure if the file could not be /// opened, success otherwise. LogicalResult pushInclude(StringRef filename, SMRange includeLoc); /// Lex the next token and return it. Token lexToken(); /// Change the position of the lexer cursor. The next token we lex will start /// at the designated point in the input. void resetPointer(const char *newPointer) { curPtr = newPointer; } /// Emit an error to the lexer with the given location and message. Token emitError(SMRange loc, const Twine &msg); Token emitError(const char *loc, const Twine &msg); Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, const Twine ¬e); private: Token formToken(Token::Kind kind, const char *tokStart) { return Token(kind, StringRef(tokStart, curPtr - tokStart)); } /// Return the next character in the stream. int getNextChar(); /// Lex methods. void lexComment(); Token lexDirective(const char *tokStart); Token lexIdentifier(const char *tokStart); Token lexNumber(const char *tokStart); Token lexString(const char *tokStart, bool isStringBlock); llvm::SourceMgr &srcMgr; int curBufferID; StringRef curBuffer; const char *curPtr; /// The engine used to emit diagnostics during lexing/parsing. ast::DiagnosticEngine &diagEngine; /// A flag indicating if we added a default diagnostic handler to the provided /// diagEngine. bool addedHandlerToDiagEngine; /// The optional code completion point within the input file. const char *codeCompletionLocation; }; } // namespace pdll } // namespace mlir #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_