//===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
#define LIB_TOOLS_PDLL_PARSER_LEXER_H_

#include "mlir/Support/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"

namespace llvm {
class SourceMgr;
} // namespace llvm

namespace mlir {
struct LogicalResult;

namespace pdll {
class CodeCompleteContext;

namespace ast {
class DiagnosticEngine;
} // namespace ast

//===----------------------------------------------------------------------===//
// Token
//===----------------------------------------------------------------------===//

class Token {
public:
  enum Kind {
    /// Markers.
    eof,
    error,
    /// Token signifying a code completion location.
    code_complete,
    /// Token signifying a code completion location within a string.
    code_complete_string,

    /// Keywords.
    KW_BEGIN,
    /// Dependent keywords, i.e. those that are treated as keywords depending on
    /// the current parser context.
    KW_DEPENDENT_BEGIN,
    kw_attr,
    kw_op,
    kw_type,
    KW_DEPENDENT_END,

    /// General keywords.
    kw_Attr,
    kw_erase,
    kw_let,
    kw_Constraint,
    kw_not,
    kw_Op,
    kw_OpName,
    kw_Pattern,
    kw_replace,
    kw_return,
    kw_rewrite,
    kw_Rewrite,
    kw_Type,
    kw_TypeRange,
    kw_Value,
    kw_ValueRange,
    kw_with,
    KW_END,

    /// Punctuation.
    arrow,
    colon,
    comma,
    dot,
    equal,
    equal_arrow,
    semicolon,
    /// Paired punctuation.
    less,
    greater,
    l_brace,
    r_brace,
    l_paren,
    r_paren,
    l_square,
    r_square,
    underscore,

    /// Tokens.
    directive,
    identifier,
    integer,
    string_block,
    string
  };
  Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}

  /// Given a token containing a string literal, return its value, including
  /// removing the quote characters and unescaping the contents of the string.
  std::string getStringValue() const;

  /// Returns true if the current token is a string literal.
  bool isString() const { return isAny(Token::string, Token::string_block); }

  /// Returns true if the current token is a keyword.
  bool isKeyword() const {
    return kind > Token::KW_BEGIN && kind < Token::KW_END;
  }

  /// Returns true if the current token is a keyword in a dependent context, and
  /// in any other situation (e.g. variable names) may be treated as an
  /// identifier.
  bool isDependentKeyword() const {
    return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
  }

  /// Return the bytes that make up this token.
  StringRef getSpelling() const { return spelling; }

  /// Return the kind of this token.
  Kind getKind() const { return kind; }

  /// Return true if this token is one of the specified kinds.
  bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
  template <typename... T>
  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
    return is(k1) || isAny(k2, k3, others...);
  }

  /// Return if the token does not have the given kind.
  bool isNot(Kind k) const { return k != kind; }
  template <typename... T>
  bool isNot(Kind k1, Kind k2, T... others) const {
    return !isAny(k1, k2, others...);
  }

  /// Return if the token has the given kind.
  bool is(Kind k) const { return kind == k; }

  /// Return a location for the start of this token.
  SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
  /// Return a location at the end of this token.
  SMLoc getEndLoc() const {
    return SMLoc::getFromPointer(spelling.data() + spelling.size());
  }
  /// Return a location for the range of this token.
  SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }

private:
  /// Discriminator that indicates the kind of token this is.
  Kind kind;

  /// A reference to the entire token contents; this is always a pointer into
  /// a memory buffer owned by the source manager.
  StringRef spelling;
};

//===----------------------------------------------------------------------===//
// Lexer
//===----------------------------------------------------------------------===//

class Lexer {
public:
  Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
        CodeCompleteContext *codeCompleteContext);
  ~Lexer();

  /// Return a reference to the source manager used by the lexer.
  llvm::SourceMgr &getSourceMgr() { return srcMgr; }

  /// Return a reference to the diagnostic engine used by the lexer.
  ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }

  /// Push an include of the given file. This will cause the lexer to start
  /// processing the provided file. Returns failure if the file could not be
  /// opened, success otherwise.
  LogicalResult pushInclude(StringRef filename, SMRange includeLoc);

  /// Lex the next token and return it.
  Token lexToken();

  /// Change the position of the lexer cursor. The next token we lex will start
  /// at the designated point in the input.
  void resetPointer(const char *newPointer) { curPtr = newPointer; }

  /// Emit an error to the lexer with the given location and message.
  Token emitError(SMRange loc, const Twine &msg);
  Token emitError(const char *loc, const Twine &msg);
  Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
                         const Twine &note);

private:
  Token formToken(Token::Kind kind, const char *tokStart) {
    return Token(kind, StringRef(tokStart, curPtr - tokStart));
  }

  /// Return the next character in the stream.
  int getNextChar();

  /// Lex methods.
  void lexComment();
  Token lexDirective(const char *tokStart);
  Token lexIdentifier(const char *tokStart);
  Token lexNumber(const char *tokStart);
  Token lexString(const char *tokStart, bool isStringBlock);

  llvm::SourceMgr &srcMgr;
  int curBufferID;
  StringRef curBuffer;
  const char *curPtr;

  /// The engine used to emit diagnostics during lexing/parsing.
  ast::DiagnosticEngine &diagEngine;

  /// A flag indicating if we added a default diagnostic handler to the provided
  /// diagEngine.
  bool addedHandlerToDiagEngine;

  /// The optional code completion point within the input file.
  const char *codeCompletionLocation;
};
} // namespace pdll
} // namespace mlir

#endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_