Make parser more forgiving
This commit is contained in:
parent
4acb76159f
commit
9afdd1e607
6 changed files with 543 additions and 129 deletions
|
@ -31,6 +31,10 @@ namespace bolt {
|
|||
size_t Line = 1;
|
||||
size_t Column = 1;
|
||||
|
||||
inline bool isEmpty() const noexcept {
|
||||
return Line == 0 && Column == 0;
|
||||
}
|
||||
|
||||
inline void advance(const ByteString& Text) {
|
||||
for (auto Chr: Text) {
|
||||
if (Chr == '\n') {
|
||||
|
@ -48,6 +52,10 @@ namespace bolt {
|
|||
return Out;
|
||||
}
|
||||
|
||||
static TextLoc empty() {
|
||||
return TextLoc { 0, 0 };
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct TextRange {
|
||||
|
@ -820,7 +828,7 @@ namespace bolt {
|
|||
|
||||
ByteString Text;
|
||||
|
||||
Identifier(ByteString Text, TextLoc StartLoc):
|
||||
Identifier(ByteString Text, TextLoc StartLoc = TextLoc::empty()):
|
||||
Symbol(NodeKind::Identifier, StartLoc), Text(Text) {}
|
||||
|
||||
ByteString getCanonicalText() override;
|
||||
|
@ -1794,6 +1802,7 @@ namespace bolt {
|
|||
template<> inline NodeKind getNodeType<InstanceKeyword>() { return NodeKind::InstanceKeyword; }
|
||||
template<> inline NodeKind getNodeType<ElifKeyword>() { return NodeKind::ElifKeyword; }
|
||||
template<> inline NodeKind getNodeType<IfKeyword>() { return NodeKind::IfKeyword; }
|
||||
template<> inline NodeKind getNodeType<MatchKeyword>() { return NodeKind::MatchKeyword; }
|
||||
template<> inline NodeKind getNodeType<ElseKeyword>() { return NodeKind::ElseKeyword; }
|
||||
template<> inline NodeKind getNodeType<Invalid>() { return NodeKind::Invalid; }
|
||||
template<> inline NodeKind getNodeType<EndOfFile>() { return NodeKind::EndOfFile; }
|
||||
|
|
|
@ -80,6 +80,7 @@ namespace bolt {
|
|||
|
||||
Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence);
|
||||
|
||||
MatchExpression* parseMatchExpression();
|
||||
Expression* parseMemberExpression();
|
||||
Expression* parsePrimitiveExpression();
|
||||
|
||||
|
@ -89,6 +90,10 @@ namespace bolt {
|
|||
TypeExpression* parseQualifiedTypeExpression();
|
||||
TypeExpression* parseArrowTypeExpression();
|
||||
VarTypeExpression* parseVarTypeExpression();
|
||||
ReferenceTypeExpression* parseReferenceTypeExpression();
|
||||
|
||||
void checkLineFoldEnd();
|
||||
void skipToLineFoldEnd();
|
||||
|
||||
public:
|
||||
|
||||
|
|
|
@ -6,9 +6,9 @@
|
|||
#include <deque>
|
||||
#include <stack>
|
||||
|
||||
#include "bolt/Text.hpp"
|
||||
#include "bolt/String.hpp"
|
||||
#include "bolt/Stream.hpp"
|
||||
#include "bolt/CST.hpp"
|
||||
|
||||
namespace bolt {
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
|
||||
// TODO make a more elegant mechanism to ref() and unref() items in a buffered stream
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
@ -60,13 +62,16 @@ namespace bolt {
|
|||
} else {
|
||||
auto Keep = Buffer.front();
|
||||
Buffer.pop_front();
|
||||
Keep->unref();
|
||||
return Keep;
|
||||
}
|
||||
}
|
||||
|
||||
value_type peek(std::size_t Offset = 0) override {
|
||||
while (Buffer.size() <= Offset) {
|
||||
Buffer.push_back(read());
|
||||
auto Item = read();
|
||||
Item->ref();
|
||||
Buffer.push_back(Item);
|
||||
}
|
||||
return Buffer[Offset];
|
||||
}
|
||||
|
|
610
src/Parser.cc
610
src/Parser.cc
|
@ -1,4 +1,6 @@
|
|||
|
||||
// TODO check for memory leaks everywhere a nullptr is returned
|
||||
|
||||
#include <exception>
|
||||
#include <vector>
|
||||
|
||||
|
@ -8,6 +10,7 @@
|
|||
#include "bolt/Scanner.hpp"
|
||||
#include "bolt/Parser.hpp"
|
||||
#include "bolt/Diagnostics.hpp"
|
||||
#include "bolt/DiagnosticEngine.hpp"
|
||||
|
||||
namespace bolt {
|
||||
|
||||
|
@ -79,10 +82,12 @@ namespace bolt {
|
|||
}
|
||||
|
||||
Token* Parser::expectToken(NodeKind Kind) {
|
||||
auto T = Tokens.get();
|
||||
auto T = Tokens.peek();
|
||||
if (T->getKind() != Kind) {
|
||||
throw UnexpectedTokenDiagnostic(File, T, std::vector<NodeKind> { Kind }); \
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T, std::vector<NodeKind> { Kind });
|
||||
return nullptr;
|
||||
}
|
||||
Tokens.get();
|
||||
return T;
|
||||
}
|
||||
|
||||
|
@ -97,7 +102,9 @@ namespace bolt {
|
|||
Tokens.get();
|
||||
return new BindPattern(static_cast<Identifier*>(T0));
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral });
|
||||
Tokens.get();
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral });
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,18 +122,18 @@ namespace bolt {
|
|||
switch (T0->getKind()) {
|
||||
case NodeKind::RArrowAlt:
|
||||
HasConstraints = true;
|
||||
goto after_scan;
|
||||
goto after_lookahead;
|
||||
case NodeKind::Equals:
|
||||
case NodeKind::BlockStart:
|
||||
case NodeKind::LineFoldEnd:
|
||||
case NodeKind::EndOfFile:
|
||||
goto after_scan;
|
||||
goto after_lookahead;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
after_scan:
|
||||
after_lookahead:
|
||||
if (!HasConstraints) {
|
||||
return parseArrowTypeExpression();
|
||||
}
|
||||
|
@ -135,36 +142,58 @@ after_scan:
|
|||
std::vector<std::tuple<ConstraintExpression*, Comma*>> Constraints;
|
||||
RParen* RParen;
|
||||
RArrowAlt* RArrowAlt;
|
||||
for (;;) {
|
||||
ConstraintExpression* C;
|
||||
auto T0 = Tokens.peek();
|
||||
switch (T0->getKind()) {
|
||||
case NodeKind::RParen:
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->getKind() == NodeKind::RParen) {
|
||||
Tokens.get();
|
||||
RParen = static_cast<class RParen*>(T0);
|
||||
RArrowAlt = expectToken<class RArrowAlt>();
|
||||
RParen = static_cast<class RParen*>(T1);
|
||||
goto after_constraints;
|
||||
default:
|
||||
C = parseConstraintExpression();
|
||||
break;
|
||||
}
|
||||
for (;;) {
|
||||
auto C = parseConstraintExpression();
|
||||
Comma* Comma = nullptr;
|
||||
auto T1 = Tokens.get();
|
||||
switch (T1->getKind()) {
|
||||
auto T2 = Tokens.get();
|
||||
switch (T2->getKind()) {
|
||||
case NodeKind::Comma:
|
||||
Constraints.push_back(std::make_tuple(C, static_cast<class Comma*>(T1)));
|
||||
{
|
||||
auto Comma = static_cast<class Comma*>(T2);
|
||||
if (C) {
|
||||
Constraints.push_back(std::make_tuple(C, Comma));
|
||||
} else {
|
||||
Comma->unref();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
case NodeKind::RParen:
|
||||
RArrowAlt = static_cast<class RArrowAlt*>(T1);
|
||||
RParen = static_cast<class RParen*>(T2);
|
||||
if (C) {
|
||||
Constraints.push_back(std::make_tuple(C, nullptr));
|
||||
RArrowAlt = expectToken<class RArrowAlt>();
|
||||
}
|
||||
goto after_constraints;
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::Comma, NodeKind::RArrowAlt });
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RArrowAlt });
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
after_constraints:
|
||||
RArrowAlt = expectToken<class RArrowAlt>();
|
||||
if (!RArrowAlt) {
|
||||
LParen->unref();
|
||||
for (auto [CE, Comma]: Constraints) {
|
||||
CE->unref();
|
||||
}
|
||||
RParen->unref();
|
||||
return nullptr;
|
||||
}
|
||||
auto TE = parseArrowTypeExpression();
|
||||
if (!TE) {
|
||||
LParen->unref();
|
||||
for (auto [CE, Comma]: Constraints) {
|
||||
CE->unref();
|
||||
}
|
||||
RParen->unref();
|
||||
RArrowAlt->unref();
|
||||
return nullptr;
|
||||
}
|
||||
return new QualifiedTypeExpression(Constraints, RArrowAlt, TE);
|
||||
}
|
||||
|
||||
|
@ -187,6 +216,14 @@ after_constraints:
|
|||
break;
|
||||
}
|
||||
auto TE = parseTypeExpression();
|
||||
if (!TE) {
|
||||
LParen->unref();
|
||||
for (auto [TE, Comma]: Elements) {
|
||||
TE->unref();
|
||||
Comma->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto T2 = Tokens.get();
|
||||
switch (T2->getKind()) {
|
||||
case NodeKind::RParen:
|
||||
|
@ -197,7 +234,13 @@ after_constraints:
|
|||
Elements.push_back({ TE, static_cast<Comma*>(T2) });
|
||||
continue;
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::Comma, NodeKind::RParen });
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RParen });
|
||||
LParen->unref();
|
||||
for (auto [TE, Comma]: Elements) {
|
||||
TE->unref();
|
||||
Comma->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
after_tuple_element:
|
||||
|
@ -207,9 +250,20 @@ after_tuple_element:
|
|||
return new TupleTypeExpression { LParen, Elements, RParen };
|
||||
}
|
||||
case NodeKind::IdentifierAlt:
|
||||
{
|
||||
return parseReferenceTypeExpression();
|
||||
default:
|
||||
Tokens.get();
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen });
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
ReferenceTypeExpression* Parser::parseReferenceTypeExpression() {
|
||||
std::vector<std::tuple<IdentifierAlt*, Dot*>> ModulePath;
|
||||
auto Name = expectToken<IdentifierAlt>();
|
||||
if (!Name) {
|
||||
return nullptr;
|
||||
}
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->getKind() != NodeKind::Dot) {
|
||||
|
@ -218,13 +272,16 @@ after_tuple_element:
|
|||
Tokens.get();
|
||||
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(Name), static_cast<Dot*>(T1)));
|
||||
Name = expectToken<IdentifierAlt>();
|
||||
if (!Name) {
|
||||
for (auto [Name, Dot]: ModulePath) {
|
||||
Name->unref();
|
||||
Dot->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return new ReferenceTypeExpression(ModulePath, static_cast<IdentifierAlt*>(Name));
|
||||
}
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier });
|
||||
}
|
||||
}
|
||||
|
||||
TypeExpression* Parser::parseArrowTypeExpression() {
|
||||
auto RetType = parsePrimitiveTypeExpression();
|
||||
|
@ -237,6 +294,12 @@ after_tuple_element:
|
|||
Tokens.get();
|
||||
ParamTypes.push_back(RetType);
|
||||
RetType = parsePrimitiveTypeExpression();
|
||||
if (!RetType) {
|
||||
for (auto ParamType: ParamTypes) {
|
||||
ParamType->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (!ParamTypes.empty()) {
|
||||
return new ArrowTypeExpression(ParamTypes, RetType);
|
||||
|
@ -244,6 +307,81 @@ after_tuple_element:
|
|||
return RetType;
|
||||
}
|
||||
|
||||
MatchExpression* Parser::parseMatchExpression() {
|
||||
auto T0 = expectToken<MatchKeyword>();
|
||||
if (!T0) {
|
||||
return nullptr;
|
||||
}
|
||||
Tokens.get();
|
||||
auto T1 = Tokens.peek();
|
||||
Expression* Value;
|
||||
BlockStart* BlockStart;
|
||||
if (llvm::isa<class BlockStart>(T1)) {
|
||||
Value = nullptr;
|
||||
BlockStart = static_cast<class BlockStart*>(T1);
|
||||
Tokens.get();
|
||||
} else {
|
||||
Value = parseExpression();
|
||||
if (!Value) {
|
||||
T0->unref();
|
||||
return nullptr;
|
||||
}
|
||||
BlockStart = expectToken<class BlockStart>();
|
||||
if (!BlockStart) {
|
||||
T0->unref();
|
||||
Value->unref();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
std::vector<MatchCase*> Cases;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
if (llvm::isa<BlockEnd>(T2)) {
|
||||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
auto Pattern = parsePattern();
|
||||
if (!Pattern) {
|
||||
T0->unref();
|
||||
Value->unref();
|
||||
BlockStart->unref();
|
||||
for (auto Case: Cases) {
|
||||
Case->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
continue;
|
||||
}
|
||||
auto RArrowAlt = expectToken<class RArrowAlt>();
|
||||
if (!RArrowAlt) {
|
||||
T0->unref();
|
||||
Value->unref();
|
||||
BlockStart->unref();
|
||||
for (auto Case: Cases) {
|
||||
Case->unref();
|
||||
}
|
||||
Pattern->unref();
|
||||
skipToLineFoldEnd();
|
||||
continue;
|
||||
}
|
||||
auto Expression = parseExpression();
|
||||
if (!Expression) {
|
||||
T0->unref();
|
||||
Value->unref();
|
||||
BlockStart->unref();
|
||||
for (auto Case: Cases) {
|
||||
Case->unref();
|
||||
}
|
||||
Pattern->unref();
|
||||
RArrowAlt->unref();
|
||||
skipToLineFoldEnd();
|
||||
continue;
|
||||
}
|
||||
checkLineFoldEnd();
|
||||
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
|
||||
}
|
||||
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
|
||||
}
|
||||
|
||||
Expression* Parser::parsePrimitiveExpression() {
|
||||
auto T0 = Tokens.peek();
|
||||
switch (T0->getKind()) {
|
||||
|
@ -258,12 +396,17 @@ after_tuple_element:
|
|||
break;
|
||||
}
|
||||
Tokens.get();
|
||||
auto Dot = expectToken<class Dot>();
|
||||
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), Dot));
|
||||
Tokens.get();
|
||||
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), static_cast<class Dot*>(T2)));
|
||||
}
|
||||
auto T3 = Tokens.get();
|
||||
if (!llvm::isa<Symbol>(T3)) {
|
||||
throw UnexpectedTokenDiagnostic(File, T3, { NodeKind::Identifier, NodeKind::IdentifierAlt });
|
||||
for (auto [Name, Dot]: ModulePath) {
|
||||
Name->unref();
|
||||
Dot->unref();
|
||||
}
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T3, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt });
|
||||
return nullptr;
|
||||
}
|
||||
return new ReferenceExpression(ModulePath, static_cast<Symbol*>(T3));
|
||||
}
|
||||
|
@ -273,73 +416,73 @@ after_tuple_element:
|
|||
std::vector<std::tuple<Expression*, Comma*>> Elements;
|
||||
auto LParen = static_cast<class LParen*>(T0);
|
||||
RParen* RParen;
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
if (llvm::isa<class RParen>(T1)) {
|
||||
Tokens.get();
|
||||
RParen = static_cast<class RParen*>(T1);
|
||||
break;
|
||||
goto after_tuple_elements;
|
||||
}
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
auto E = parseExpression();
|
||||
if (!E) {
|
||||
LParen->unref();
|
||||
for (auto [E, Comma]: Elements) {
|
||||
E->unref();
|
||||
Comma->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto T2 = Tokens.get();
|
||||
switch (T2->getKind()) {
|
||||
case NodeKind::RParen:
|
||||
RParen = static_cast<class RParen*>(T2);
|
||||
Elements.push_back({ E, nullptr });
|
||||
goto finish;
|
||||
goto after_tuple_elements;
|
||||
case NodeKind::Comma:
|
||||
Elements.push_back({ E, static_cast<class Comma*>(T2) });
|
||||
break;
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::RParen, NodeKind::Comma });
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
|
||||
LParen->unref();
|
||||
for (auto [E, Comma]: Elements) {
|
||||
E->unref();
|
||||
Comma->unref();
|
||||
}
|
||||
return nullptr;
|
||||
case NodeKind::LineFoldEnd:
|
||||
case NodeKind::BlockStart:
|
||||
case NodeKind::EndOfFile:
|
||||
// Can recover from this one
|
||||
RParen = nullptr;
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
|
||||
goto after_tuple_elements;
|
||||
}
|
||||
}
|
||||
finish:
|
||||
after_tuple_elements:
|
||||
if (Elements.size() == 1 && !std::get<1>(Elements.front())) {
|
||||
return new NestedExpression(LParen, std::get<0>(Elements.front()), RParen);
|
||||
}
|
||||
return new TupleExpression { LParen, Elements, RParen };
|
||||
}
|
||||
case NodeKind::MatchKeyword:
|
||||
{
|
||||
Tokens.get();
|
||||
auto T1 = Tokens.peek();
|
||||
Expression* Value;
|
||||
BlockStart* BlockStart;
|
||||
if (llvm::isa<class BlockStart>(T1)) {
|
||||
Value = nullptr;
|
||||
BlockStart = static_cast<class BlockStart*>(T1);
|
||||
Tokens.get();
|
||||
} else {
|
||||
Value = parseExpression();
|
||||
BlockStart = expectToken<class BlockStart>();
|
||||
}
|
||||
std::vector<MatchCase*> Cases;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
if (llvm::isa<BlockEnd>(T2)) {
|
||||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
auto Pattern = parsePattern();
|
||||
auto RArrowAlt = expectToken<class RArrowAlt>();
|
||||
auto Expression = parseExpression();
|
||||
expectToken<LineFoldEnd>();
|
||||
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
|
||||
}
|
||||
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
|
||||
}
|
||||
return parseMatchExpression();
|
||||
case NodeKind::IntegerLiteral:
|
||||
case NodeKind::StringLiteral:
|
||||
Tokens.get();
|
||||
return new ConstantExpression(static_cast<Literal*>(T0));
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T0, { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::IntegerLiteral, NodeKind::StringLiteral });
|
||||
Tokens.get();
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen, NodeKind::IntegerLiteral, NodeKind::StringLiteral });
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Expression* Parser::parseMemberExpression() {
|
||||
auto E = parsePrimitiveExpression();
|
||||
if (!E) {
|
||||
return nullptr;
|
||||
}
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek(0);
|
||||
auto T2 = Tokens.peek(1);
|
||||
|
@ -363,13 +506,24 @@ finish:
|
|||
|
||||
Expression* Parser::parseCallExpression() {
|
||||
auto Operator = parseMemberExpression();
|
||||
if (!Operator) {
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<Expression*> Args;
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->getKind() == NodeKind::LineFoldEnd || T1->getKind() == NodeKind::RParen || T1->getKind() == NodeKind::BlockStart || T1->getKind() == NodeKind::Comma || ExprOperators.isInfix(T1)) {
|
||||
break;
|
||||
}
|
||||
Args.push_back(parsePrimitiveExpression());
|
||||
auto Arg = parsePrimitiveExpression();
|
||||
if (!Arg) {
|
||||
Operator->unref();
|
||||
for (auto Arg: Args) {
|
||||
Arg->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
Args.push_back(Arg);
|
||||
}
|
||||
if (Args.empty()) {
|
||||
return Operator;
|
||||
|
@ -388,13 +542,19 @@ finish:
|
|||
Prefix.push_back(T0);
|
||||
}
|
||||
auto E = parseCallExpression();
|
||||
if (!E) {
|
||||
for (auto Tok: Prefix) {
|
||||
Tok->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) {
|
||||
E = new PrefixExpression(*Iter, E);
|
||||
}
|
||||
return E;
|
||||
}
|
||||
|
||||
Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) {
|
||||
Expression* Parser::parseInfixOperatorAfterExpression(Expression* Left, int MinPrecedence) {
|
||||
for (;;) {
|
||||
auto T0 = Tokens.peek();
|
||||
auto Info0 = ExprOperators.getInfix(T0);
|
||||
|
@ -402,46 +562,88 @@ finish:
|
|||
break;
|
||||
}
|
||||
Tokens.get();
|
||||
auto RHS = parseUnaryExpression();
|
||||
auto Right = parseUnaryExpression();
|
||||
if (!Right) {
|
||||
Left->unref();
|
||||
T0->unref();
|
||||
return nullptr;
|
||||
}
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
auto Info1 = ExprOperators.getInfix(T1);
|
||||
if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) {
|
||||
break;
|
||||
}
|
||||
RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence);
|
||||
auto NewRight = parseInfixOperatorAfterExpression(Right, Info1->Precedence);
|
||||
if (!NewRight) {
|
||||
Left->unref();
|
||||
T0->unref();
|
||||
Right->unref();
|
||||
return nullptr;
|
||||
}
|
||||
LHS = new InfixExpression(LHS, T0, RHS);
|
||||
Right = NewRight;
|
||||
}
|
||||
return LHS;
|
||||
Left = new InfixExpression(Left, T0, Right);
|
||||
}
|
||||
return Left;
|
||||
}
|
||||
|
||||
Expression* Parser::parseExpression() {
|
||||
return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0);
|
||||
auto Left = parseUnaryExpression();
|
||||
if (!Left) {
|
||||
return nullptr;
|
||||
}
|
||||
return parseInfixOperatorAfterExpression(Left, 0);
|
||||
}
|
||||
|
||||
ExpressionStatement* Parser::parseExpressionStatement() {
|
||||
auto E = parseExpression();
|
||||
BOLT_EXPECT_TOKEN(LineFoldEnd);
|
||||
if (!E) {
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
checkLineFoldEnd();
|
||||
return new ExpressionStatement(E);
|
||||
}
|
||||
|
||||
ReturnStatement* Parser::parseReturnStatement() {
|
||||
auto T0 = static_cast<ReturnKeyword*>(expectToken(NodeKind::ReturnKeyword));
|
||||
Expression* Expression = nullptr;
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->getKind() != NodeKind::LineFoldEnd) {
|
||||
Expression = parseExpression();
|
||||
auto ReturnKeyword = expectToken<class ReturnKeyword>();
|
||||
if (!ReturnKeyword) {
|
||||
return nullptr;
|
||||
}
|
||||
BOLT_EXPECT_TOKEN(LineFoldEnd);
|
||||
return new ReturnStatement(static_cast<ReturnKeyword*>(T0), Expression);
|
||||
Expression* Expression;
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->getKind() == NodeKind::LineFoldEnd) {
|
||||
Tokens.get()->unref();
|
||||
Expression = nullptr;
|
||||
} else {
|
||||
Expression = parseExpression();
|
||||
if (!Expression) {
|
||||
ReturnKeyword->unref();
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
checkLineFoldEnd();
|
||||
}
|
||||
return new ReturnStatement(ReturnKeyword, Expression);
|
||||
}
|
||||
|
||||
IfStatement* Parser::parseIfStatement() {
|
||||
std::vector<IfStatementPart*> Parts;
|
||||
auto T0 = expectToken(NodeKind::IfKeyword);
|
||||
auto IfKeyword = expectToken<class IfKeyword>();
|
||||
auto Test = parseExpression();
|
||||
auto T1 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart));
|
||||
if (!Test) {
|
||||
IfKeyword->unref();
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
auto T1 = expectToken<BlockStart>();
|
||||
if (!T1) {
|
||||
IfKeyword->unref();
|
||||
Test->unref();
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<Node*> Then;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
|
@ -449,14 +651,23 @@ finish:
|
|||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
Then.push_back(parseLetBodyElement());
|
||||
auto Element = parseLetBodyElement();
|
||||
if (Element) {
|
||||
Then.push_back(Element);
|
||||
}
|
||||
Parts.push_back(new IfStatementPart(T0, Test, T1, Then));
|
||||
BOLT_EXPECT_TOKEN(LineFoldEnd)
|
||||
}
|
||||
Tokens.get(); // Always a LineFoldEnd
|
||||
Parts.push_back(new IfStatementPart(IfKeyword, Test, T1, Then));
|
||||
auto T3 = Tokens.peek();
|
||||
if (T3->getKind() == NodeKind::ElseKeyword) {
|
||||
Tokens.get();
|
||||
auto T4 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart));
|
||||
auto T4 = expectToken<BlockStart>();
|
||||
if (!T4) {
|
||||
for (auto Part: Parts) {
|
||||
Part->unref();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<Node*> Else;
|
||||
for (;;) {
|
||||
auto T5 = Tokens.peek();
|
||||
|
@ -464,10 +675,13 @@ finish:
|
|||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
Else.push_back(parseLetBodyElement());
|
||||
auto Element = parseLetBodyElement();
|
||||
if (Element) {
|
||||
Else.push_back(Element);
|
||||
}
|
||||
}
|
||||
Tokens.get(); // Always a LineFoldEnd
|
||||
Parts.push_back(new IfStatementPart(T3, nullptr, T4, Else));
|
||||
BOLT_EXPECT_TOKEN(LineFoldEnd)
|
||||
}
|
||||
return new IfStatement(Parts);
|
||||
}
|
||||
|
@ -477,13 +691,21 @@ finish:
|
|||
PubKeyword* Pub = nullptr;
|
||||
LetKeyword* Let;
|
||||
MutKeyword* Mut = nullptr;
|
||||
TypeAssert* TA = nullptr;
|
||||
LetBody* Body = nullptr;
|
||||
|
||||
auto T0 = Tokens.get();
|
||||
if (T0->getKind() == NodeKind::PubKeyword) {
|
||||
Pub = static_cast<PubKeyword*>(T0);
|
||||
T0 = Tokens.get();
|
||||
}
|
||||
if (T0->getKind() != NodeKind::LetKeyword) {
|
||||
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::LetKeyword });
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LetKeyword });
|
||||
if (Pub) {
|
||||
Pub->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
Let = static_cast<LetKeyword*>(T0);
|
||||
auto T1 = Tokens.peek();
|
||||
|
@ -493,6 +715,17 @@ finish:
|
|||
}
|
||||
|
||||
auto Patt = parsePattern();
|
||||
if (!Patt) {
|
||||
if (Pub) {
|
||||
Pub->unref();
|
||||
}
|
||||
Let->unref();
|
||||
if (Mut) {
|
||||
Mut->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<Parameter*> Params;
|
||||
Token* T2;
|
||||
|
@ -505,21 +738,28 @@ finish:
|
|||
case NodeKind::Colon:
|
||||
goto after_params;
|
||||
default:
|
||||
Params.push_back(new Parameter(parsePattern(), nullptr));
|
||||
auto P = parsePattern();
|
||||
if (P == nullptr) {
|
||||
P = new BindPattern(new Identifier("_"));
|
||||
}
|
||||
Params.push_back(new Parameter(P, nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
after_params:
|
||||
|
||||
TypeAssert* TA = nullptr;
|
||||
if (T2->getKind() == NodeKind::Colon) {
|
||||
Tokens.get();
|
||||
auto TE = parseTypeExpression();
|
||||
if (TE) {
|
||||
TA = new TypeAssert(static_cast<Colon*>(T2), TE);
|
||||
} else {
|
||||
skipToLineFoldEnd();
|
||||
goto finish;
|
||||
}
|
||||
T2 = Tokens.peek();
|
||||
}
|
||||
|
||||
LetBody* Body;
|
||||
switch (T2->getKind()) {
|
||||
case NodeKind::BlockStart:
|
||||
{
|
||||
|
@ -530,18 +770,29 @@ after_params:
|
|||
if (T3->getKind() == NodeKind::BlockEnd) {
|
||||
break;
|
||||
}
|
||||
Elements.push_back(parseLetBodyElement());
|
||||
auto Element = parseLetBodyElement();
|
||||
if (Element) {
|
||||
Elements.push_back(Element);
|
||||
}
|
||||
}
|
||||
Tokens.get();
|
||||
Body = new LetBlockBody(static_cast<BlockStart*>(T2), Elements);
|
||||
break;
|
||||
}
|
||||
case NodeKind::Equals:
|
||||
{
|
||||
Tokens.get();
|
||||
Body = new LetExprBody(static_cast<Equals*>(T2), parseExpression());
|
||||
auto E = parseExpression();
|
||||
if (E == nullptr) {
|
||||
skipToLineFoldEnd();
|
||||
goto finish;
|
||||
}
|
||||
if (E) {
|
||||
Body = new LetExprBody(static_cast<Equals*>(T2), E);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NodeKind::LineFoldEnd:
|
||||
Body = nullptr;
|
||||
break;
|
||||
default:
|
||||
std::vector<NodeKind> Expected { NodeKind::BlockStart, NodeKind::LineFoldEnd, NodeKind::Equals };
|
||||
|
@ -551,11 +802,14 @@ after_params:
|
|||
// First tokens of Pattern
|
||||
Expected.push_back(NodeKind::Identifier);
|
||||
}
|
||||
throw UnexpectedTokenDiagnostic(File, T2, Expected);
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T2, Expected);
|
||||
}
|
||||
|
||||
BOLT_EXPECT_TOKEN(LineFoldEnd);
|
||||
after_body:
|
||||
|
||||
checkLineFoldEnd();
|
||||
|
||||
finish:
|
||||
return new LetDeclaration(
|
||||
Pub,
|
||||
Let,
|
||||
|
@ -588,24 +842,39 @@ after_params:
|
|||
switch (Tok->getKind()) {
|
||||
case NodeKind::Tilde:
|
||||
HasTilde = true;
|
||||
goto after_seek;
|
||||
goto after_lookahead;
|
||||
case NodeKind::RParen:
|
||||
case NodeKind::Comma:
|
||||
case NodeKind::RArrowAlt:
|
||||
case NodeKind::EndOfFile:
|
||||
goto after_seek;
|
||||
goto after_lookahead;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
after_seek:
|
||||
after_lookahead:
|
||||
if (HasTilde) {
|
||||
auto Left = parseArrowTypeExpression();
|
||||
if (!Left) {
|
||||
return nullptr;
|
||||
}
|
||||
auto Tilde = expectToken<class Tilde>();
|
||||
if (!Tilde) {
|
||||
Left->unref();
|
||||
return nullptr;
|
||||
}
|
||||
auto Right = parseArrowTypeExpression();
|
||||
if (!Right) {
|
||||
Left->unref();
|
||||
Tilde->unref();
|
||||
return nullptr;
|
||||
}
|
||||
return new EqualityConstraintExpression { Left, Tilde, Right };
|
||||
}
|
||||
auto Name = expectToken<IdentifierAlt>();
|
||||
if (!Name) {
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<VarTypeExpression*> TEs;
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
|
@ -619,7 +888,9 @@ after_seek:
|
|||
TEs.push_back(new VarTypeExpression { static_cast<Identifier*>(T1) });
|
||||
break;
|
||||
default:
|
||||
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier });
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier });
|
||||
Name->unref();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
after_vars:
|
||||
|
@ -628,22 +899,60 @@ after_vars:
|
|||
|
||||
VarTypeExpression* Parser::parseVarTypeExpression() {
|
||||
auto Name = expectToken<Identifier>();
|
||||
// TODO reject constructor symbols (starting with a capital letter)
|
||||
if (!Name) {
|
||||
return nullptr;
|
||||
}
|
||||
for (auto Ch: Name->Text) {
|
||||
if (!std::islower(Ch)) {
|
||||
// TODO
|
||||
// DE.add<TypeVarMustContainLowercaseLettersDiagnostic>(Name);
|
||||
Name->unref();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return new VarTypeExpression { Name };
|
||||
}
|
||||
|
||||
InstanceDeclaration* Parser::parseInstanceDeclaration() {
|
||||
auto InstanceKeyword = expectToken<class InstanceKeyword>();
|
||||
if (!InstanceKeyword) {
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
auto Name = expectToken<IdentifierAlt>();
|
||||
if (!Name) {
|
||||
InstanceKeyword->unref();
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<TypeExpression*> TypeExps;
|
||||
for (;;) {
|
||||
auto T1 = Tokens.peek();
|
||||
if (T1->is<BlockStart>()) {
|
||||
break;
|
||||
}
|
||||
TypeExps.push_back(parseTypeExpression());
|
||||
auto TE = parseTypeExpression();
|
||||
if (!TE) {
|
||||
InstanceKeyword->unref();
|
||||
Name->unref();
|
||||
for (auto TE: TypeExps) {
|
||||
TE->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
TypeExps.push_back(TE);
|
||||
}
|
||||
auto BlockStart = expectToken<class BlockStart>();
|
||||
if (!BlockStart) {
|
||||
InstanceKeyword->unref();
|
||||
Name->unref();
|
||||
for (auto TE: TypeExps) {
|
||||
TE->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<Node*> Elements;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
|
@ -651,9 +960,12 @@ after_vars:
|
|||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
Elements.push_back(parseClassElement());
|
||||
auto Element = parseClassElement();
|
||||
if (Element) {
|
||||
Elements.push_back(Element);
|
||||
}
|
||||
expectToken(NodeKind::LineFoldEnd);
|
||||
}
|
||||
checkLineFoldEnd();
|
||||
return new InstanceDeclaration(
|
||||
InstanceKeyword,
|
||||
Name,
|
||||
|
@ -671,16 +983,54 @@ after_vars:
|
|||
PubKeyword = static_cast<class PubKeyword*>(T0);
|
||||
}
|
||||
auto ClassKeyword = expectToken<class ClassKeyword>();
|
||||
if (!ClassKeyword) {
|
||||
if (PubKeyword) {
|
||||
PubKeyword->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
auto Name = expectToken<IdentifierAlt>();
|
||||
if (!Name) {
|
||||
if (PubKeyword) {
|
||||
PubKeyword->unref();
|
||||
}
|
||||
ClassKeyword->unref();
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<VarTypeExpression*> TypeVars;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
if (T2->getKind() == NodeKind::BlockStart) {
|
||||
break;
|
||||
}
|
||||
TypeVars.push_back(parseVarTypeExpression());
|
||||
auto TE = parseVarTypeExpression();
|
||||
if (!TE) {
|
||||
if (PubKeyword) {
|
||||
PubKeyword->unref();
|
||||
}
|
||||
ClassKeyword->unref();
|
||||
for (auto TV: TypeVars) {
|
||||
TV->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
TypeVars.push_back(TE);
|
||||
}
|
||||
auto BlockStart = expectToken<class BlockStart>();
|
||||
if (!BlockStart) {
|
||||
if (PubKeyword) {
|
||||
PubKeyword->unref();
|
||||
}
|
||||
ClassKeyword->unref();
|
||||
for (auto TV: TypeVars) {
|
||||
TV->unref();
|
||||
}
|
||||
skipToLineFoldEnd();
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<Node*> Elements;
|
||||
for (;;) {
|
||||
auto T2 = Tokens.peek();
|
||||
|
@ -688,9 +1038,12 @@ after_vars:
|
|||
Tokens.get();
|
||||
break;
|
||||
}
|
||||
Elements.push_back(parseClassElement());
|
||||
auto Element = parseClassElement();
|
||||
if (Element) {
|
||||
Elements.push_back(Element);
|
||||
}
|
||||
expectToken(NodeKind::LineFoldEnd);
|
||||
}
|
||||
Tokens.get(); // Always a LineFoldEnd
|
||||
return new ClassDeclaration(
|
||||
PubKeyword,
|
||||
ClassKeyword,
|
||||
|
@ -736,10 +1089,49 @@ after_vars:
|
|||
if (T0->is<EndOfFile>()) {
|
||||
break;
|
||||
}
|
||||
Elements.push_back(parseSourceElement());
|
||||
auto Element = parseSourceElement();
|
||||
if (Element) {
|
||||
Elements.push_back(Element);
|
||||
}
|
||||
}
|
||||
return new SourceFile(File, Elements);
|
||||
}
|
||||
|
||||
void Parser::skipToLineFoldEnd() {
|
||||
unsigned Level = 0;
|
||||
for (;;) {
|
||||
auto T0 = Tokens.get();
|
||||
switch (T0->getKind()) {
|
||||
case NodeKind::EndOfFile:
|
||||
return;
|
||||
case NodeKind::LineFoldEnd:
|
||||
T0->unref();
|
||||
if (Level == 0) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case NodeKind::BlockStart:
|
||||
T0->unref();
|
||||
Level++;
|
||||
break;
|
||||
case NodeKind::BlockEnd:
|
||||
T0->unref();
|
||||
Level--;
|
||||
break;
|
||||
default:
|
||||
T0->unref();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Parser::checkLineFoldEnd() {
|
||||
auto T0 = Tokens.get();
|
||||
if (T0->getKind() != NodeKind::LineFoldEnd) {
|
||||
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LineFoldEnd });
|
||||
skipToLineFoldEnd();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -72,6 +72,7 @@ namespace bolt {
|
|||
{ "match", NodeKind::MatchKeyword },
|
||||
{ "class", NodeKind::ClassKeyword },
|
||||
{ "instance", NodeKind::InstanceKeyword },
|
||||
{ "struct", NodeKind::StructKeyword },
|
||||
};
|
||||
|
||||
Scanner::Scanner(TextFile& File, Stream<Char>& Chars):
|
||||
|
@ -242,6 +243,8 @@ digit_finish:
|
|||
return new ClassKeyword(StartLoc);
|
||||
case NodeKind::InstanceKeyword:
|
||||
return new InstanceKeyword(StartLoc);
|
||||
case NodeKind::StructKeyword:
|
||||
return new StructKeyword(StartLoc);
|
||||
default:
|
||||
ZEN_UNREACHABLE
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue