#include "bolt/CST.hpp" #include "bolt/Scanner.hpp" #include "bolt/Parser.hpp" #include "bolt/Diagnostics.hpp" #include #include namespace bolt { std::optional OperatorTable::getInfix(Token* T) { auto Match = Mapping.find(T->getText()); if (Match == Mapping.end() || !Match->second.isInfix()) { return {}; } return Match->second; } bool OperatorTable::isInfix(Token* T) { auto Match = Mapping.find(T->getText()); return Match != Mapping.end() && Match->second.isInfix(); } bool OperatorTable::isPrefix(Token* T) { auto Match = Mapping.find(T->getText()); return Match != Mapping.end() && Match->second.isPrefix(); } bool OperatorTable::isSuffix(Token* T) { auto Match = Mapping.find(T->getText()); return Match != Mapping.end() && Match->second.isSuffix(); } void OperatorTable::add(std::string Name, unsigned Flags, int Precedence) { Mapping.emplace(Name, OperatorInfo { Precedence, Flags }); } Parser::Parser(TextFile& File, Stream& S): File(File), Tokens(S) { ExprOperators.add("**", OperatorFlags_InfixR, 10); ExprOperators.add("*", OperatorFlags_InfixL, 5); ExprOperators.add("/", OperatorFlags_InfixL, 5); ExprOperators.add("+", OperatorFlags_InfixL, 4); ExprOperators.add("-", OperatorFlags_InfixL, 4); ExprOperators.add("<", OperatorFlags_InfixL, 3); ExprOperators.add(">", OperatorFlags_InfixL, 3); ExprOperators.add("<=", OperatorFlags_InfixL, 3); ExprOperators.add(">=", OperatorFlags_InfixL, 3); ExprOperators.add("==", OperatorFlags_InfixL, 3); ExprOperators.add("!=", OperatorFlags_InfixL, 3); ExprOperators.add(":", OperatorFlags_InfixL, 2); ExprOperators.add("<|>", OperatorFlags_InfixL, 1); ExprOperators.add("$", OperatorFlags_InfixR, 0); } Token* Parser::peekFirstTokenAfterModifiers() { std::size_t I = 0; for (;;) { auto T0 = Tokens.peek(I++); switch (T0->Type) { case NodeType::PubKeyword: case NodeType::MutKeyword: continue; default: return T0; } } } #define BOLT_EXPECT_TOKEN(name) \ { \ auto __Token = Tokens.get(); \ if (__Token->Type != NodeType::name) { \ throw UnexpectedTokenDiagnostic(File, __Token, std::vector { NodeType::name }); \ } \ } Token* Parser::expectToken(NodeType Type) { auto T = Tokens.get(); if (T->Type != Type) { throw UnexpectedTokenDiagnostic(File, T, std::vector { Type }); \ } return T; } Pattern* Parser::parsePattern() { auto T0 = Tokens.peek(); switch (T0->Type) { case NodeType::Identifier: Tokens.get(); return new BindPattern(static_cast(T0)); default: throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeType::Identifier }); } } QualifiedName* Parser::parseQualifiedName() { std::vector ModulePath; auto Name = expectToken(NodeType::Identifier); for (;;) { auto T1 = Tokens.peek(); if (T1->Type != NodeType::Dot) { break; } Tokens.get(); ModulePath.push_back(static_cast(Name)); Name = Tokens.get(); if (Name->Type != NodeType::Identifier) { throw UnexpectedTokenDiagnostic(File, Name, std::vector { NodeType::Identifier }); } } return new QualifiedName(ModulePath, static_cast(Name)); } TypeExpression* Parser::parsePrimitiveTypeExpression() { auto T0 = Tokens.peek(); switch (T0->Type) { case NodeType::Identifier: return new ReferenceTypeExpression(parseQualifiedName()); default: throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeType::Identifier }); } } TypeExpression* Parser::parseTypeExpression() { auto RetType = parsePrimitiveTypeExpression(); std::vector ParamTypes; for (;;) { auto T1 = Tokens.peek(); if (T1->Type != NodeType::RArrow) { break; } Tokens.get(); ParamTypes.push_back(RetType); RetType = parsePrimitiveTypeExpression(); } if (ParamTypes.size()) { return new ArrowTypeExpression(ParamTypes, RetType); } return RetType; } Expression* Parser::parsePrimitiveExpression() { auto T0 = Tokens.peek(); switch (T0->Type) { case NodeType::Identifier: { auto Name = parseQualifiedName(); return new ReferenceExpression(Name); } case NodeType::IntegerLiteral: case NodeType::StringLiteral: Tokens.get(); return new ConstantExpression(T0); default: throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeType::Identifier, NodeType::IntegerLiteral, NodeType::StringLiteral }); } } Expression* Parser::parseCallExpression() { auto Operator = parsePrimitiveExpression(); std::vector Args; for (;;) { auto T1 = Tokens.peek(); if (T1->Type == NodeType::LineFoldEnd || T1->Type == NodeType::BlockStart || ExprOperators.isInfix(T1)) { break; } Args.push_back(parsePrimitiveExpression()); } if (Args.empty()) { return Operator; } return new CallExpression(Operator, Args); } Expression* Parser::parseUnaryExpression() { std::vector Prefix; for (;;) { auto T0 = Tokens.peek(); if (!ExprOperators.isPrefix(T0)) { break; } Tokens.get(); Prefix.push_back(T0); } auto E = parseCallExpression(); for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) { E = new UnaryExpression(*Iter, E); } return E; } Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) { for (;;) { auto T0 = Tokens.peek(); auto Info0 = ExprOperators.getInfix(T0); if (!Info0 || Info0->Precedence < MinPrecedence) { break; } Tokens.get(); auto RHS = parseUnaryExpression(); for (;;) { auto T1 = Tokens.peek(); auto Info1 = ExprOperators.getInfix(T1); if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) { break; } RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence); } LHS = new InfixExpression(LHS, T0, RHS); } return LHS; } Expression* Parser::parseExpression() { return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0); } ExpressionStatement* Parser::parseExpressionStatement() { auto E = parseExpression(); BOLT_EXPECT_TOKEN(LineFoldEnd); return new ExpressionStatement(E); } ReturnStatement* Parser::parseReturnStatement() { auto T0 = static_cast(expectToken(NodeType::ReturnKeyword)); Expression* Expression = nullptr; auto T1 = Tokens.peek(); if (T1->Type != NodeType::LineFoldEnd) { Expression = parseExpression(); } BOLT_EXPECT_TOKEN(LineFoldEnd); return new ReturnStatement(static_cast(T0), Expression); } IfStatement* Parser::parseIfStatement() { std::vector Parts; auto T0 = expectToken(NodeType::IfKeyword); auto Test = parseExpression(); auto T1 = static_cast(expectToken(NodeType::BlockStart)); std::vector Then; for (;;) { auto T2 = Tokens.peek(); if (T2->Type == NodeType::BlockEnd) { Tokens.get(); break; } Then.push_back(parseLetBodyElement()); } Parts.push_back(new IfStatementPart(T0, Test, T1, Then)); BOLT_EXPECT_TOKEN(LineFoldEnd) auto T3 = Tokens.peek(); if (T3->Type == NodeType::ElseKeyword) { Tokens.get(); auto T4 = static_cast(expectToken(NodeType::BlockStart)); std::vector Else; for (;;) { auto T5 = Tokens.peek(); if (T5->Type == NodeType::BlockEnd) { Tokens.get(); break; } Else.push_back(parseLetBodyElement()); } Parts.push_back(new IfStatementPart(T3, nullptr, T4, Else)); BOLT_EXPECT_TOKEN(LineFoldEnd) } return new IfStatement(Parts); } LetDeclaration* Parser::parseLetDeclaration() { PubKeyword* Pub = nullptr; LetKeyword* Let; MutKeyword* Mut = nullptr; auto T0 = Tokens.get(); if (T0->Type == NodeType::PubKeyword) { Pub = static_cast(T0); T0 = Tokens.get(); } if (T0->Type != NodeType::LetKeyword) { throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeType::LetKeyword }); } Let = static_cast(T0); auto T1 = Tokens.peek(); if (T1->Type == NodeType::MutKeyword) { Mut = static_cast(T1); Tokens.get(); } auto Patt = parsePattern(); std::vector Params; Token* T2; for (;;) { T2 = Tokens.peek(); switch (T2->Type) { case NodeType::LineFoldEnd: case NodeType::BlockStart: case NodeType::Equals: case NodeType::Colon: goto after_params; default: Params.push_back(new Param(parsePattern(), nullptr)); } } after_params: TypeAssert* TA = nullptr; if (T2->Type == NodeType::Colon) { Tokens.get(); auto TE = parseTypeExpression(); TA = new TypeAssert(static_cast(T2), TE); T2 = Tokens.peek(); } LetBody* Body; switch (T2->Type) { case NodeType::BlockStart: { Tokens.get(); std::vector Elements; for (;;) { auto T3 = Tokens.peek(); if (T3->Type == NodeType::BlockEnd) { break; } Elements.push_back(parseLetBodyElement()); } Tokens.get(); Body = new LetBlockBody(static_cast(T2), Elements); break; } case NodeType::Equals: Tokens.get(); Body = new LetExprBody(static_cast(T2), parseExpression()); break; case NodeType::LineFoldEnd: Body = nullptr; break; default: std::vector Expected { NodeType::BlockStart, NodeType::LineFoldEnd, NodeType::Equals }; if (TA == nullptr) { // First tokens of TypeAssert Expected.push_back(NodeType::Colon); // First tokens of Pattern Expected.push_back(NodeType::Identifier); } throw UnexpectedTokenDiagnostic(File, T2, Expected); } BOLT_EXPECT_TOKEN(LineFoldEnd); return new LetDeclaration( Pub, Let, Mut, Patt, Params, TA, Body ); } Node* Parser::parseLetBodyElement() { auto T0 = peekFirstTokenAfterModifiers(); switch (T0->Type) { case NodeType::LetKeyword: return parseLetDeclaration(); case NodeType::ReturnKeyword: return parseReturnStatement(); case NodeType::IfKeyword: return parseIfStatement(); default: return parseExpressionStatement(); } } Node* Parser::parseSourceElement() { auto T0 = peekFirstTokenAfterModifiers(); switch (T0->Type) { case NodeType::LetKeyword: return parseLetDeclaration(); case NodeType::IfKeyword: return parseIfStatement(); default: return parseExpressionStatement(); } } SourceFile* Parser::parseSourceFile() { std::vector Elements; for (;;) { auto T0 = Tokens.peek(); if (T0->Type == NodeType::EndOfFile) { break; } Elements.push_back(parseSourceElement()); } return new SourceFile(File, Elements); } }