Make parser more forgiving

This commit is contained in:
Sam Vervaeck 2023-05-26 19:51:12 +02:00
parent 4acb76159f
commit 9afdd1e607
Signed by: samvv
SSH key fingerprint: SHA256:dIg0ywU1OP+ZYifrYxy8c5esO72cIKB+4/9wkZj1VaY
6 changed files with 543 additions and 129 deletions

View file

@ -31,6 +31,10 @@ namespace bolt {
size_t Line = 1;
size_t Column = 1;
inline bool isEmpty() const noexcept {
return Line == 0 && Column == 0;
}
inline void advance(const ByteString& Text) {
for (auto Chr: Text) {
if (Chr == '\n') {
@ -48,6 +52,10 @@ namespace bolt {
return Out;
}
static TextLoc empty() {
return TextLoc { 0, 0 };
}
};
struct TextRange {
@ -820,7 +828,7 @@ namespace bolt {
ByteString Text;
Identifier(ByteString Text, TextLoc StartLoc):
Identifier(ByteString Text, TextLoc StartLoc = TextLoc::empty()):
Symbol(NodeKind::Identifier, StartLoc), Text(Text) {}
ByteString getCanonicalText() override;
@ -1794,6 +1802,7 @@ namespace bolt {
template<> inline NodeKind getNodeType<InstanceKeyword>() { return NodeKind::InstanceKeyword; }
template<> inline NodeKind getNodeType<ElifKeyword>() { return NodeKind::ElifKeyword; }
template<> inline NodeKind getNodeType<IfKeyword>() { return NodeKind::IfKeyword; }
template<> inline NodeKind getNodeType<MatchKeyword>() { return NodeKind::MatchKeyword; }
template<> inline NodeKind getNodeType<ElseKeyword>() { return NodeKind::ElseKeyword; }
template<> inline NodeKind getNodeType<Invalid>() { return NodeKind::Invalid; }
template<> inline NodeKind getNodeType<EndOfFile>() { return NodeKind::EndOfFile; }

View file

@ -80,6 +80,7 @@ namespace bolt {
Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence);
MatchExpression* parseMatchExpression();
Expression* parseMemberExpression();
Expression* parsePrimitiveExpression();
@ -89,6 +90,10 @@ namespace bolt {
TypeExpression* parseQualifiedTypeExpression();
TypeExpression* parseArrowTypeExpression();
VarTypeExpression* parseVarTypeExpression();
ReferenceTypeExpression* parseReferenceTypeExpression();
void checkLineFoldEnd();
void skipToLineFoldEnd();
public:

View file

@ -6,9 +6,9 @@
#include <deque>
#include <stack>
#include "bolt/Text.hpp"
#include "bolt/String.hpp"
#include "bolt/Stream.hpp"
#include "bolt/CST.hpp"
namespace bolt {

View file

@ -1,4 +1,6 @@
// TODO make a more elegant mechanism to ref() and unref() items in a buffered stream
#pragma once
#include <cstddef>
@ -60,13 +62,16 @@ namespace bolt {
} else {
auto Keep = Buffer.front();
Buffer.pop_front();
Keep->unref();
return Keep;
}
}
value_type peek(std::size_t Offset = 0) override {
while (Buffer.size() <= Offset) {
Buffer.push_back(read());
auto Item = read();
Item->ref();
Buffer.push_back(Item);
}
return Buffer[Offset];
}

View file

@ -1,4 +1,6 @@
// TODO check for memory leaks everywhere a nullptr is returned
#include <exception>
#include <vector>
@ -8,6 +10,7 @@
#include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp"
#include "bolt/Diagnostics.hpp"
#include "bolt/DiagnosticEngine.hpp"
namespace bolt {
@ -79,10 +82,12 @@ namespace bolt {
}
Token* Parser::expectToken(NodeKind Kind) {
auto T = Tokens.get();
auto T = Tokens.peek();
if (T->getKind() != Kind) {
throw UnexpectedTokenDiagnostic(File, T, std::vector<NodeKind> { Kind }); \
DE.add<UnexpectedTokenDiagnostic>(File, T, std::vector<NodeKind> { Kind });
return nullptr;
}
Tokens.get();
return T;
}
@ -97,7 +102,9 @@ namespace bolt {
Tokens.get();
return new BindPattern(static_cast<Identifier*>(T0));
default:
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral });
Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral });
return nullptr;
}
}
@ -115,18 +122,18 @@ namespace bolt {
switch (T0->getKind()) {
case NodeKind::RArrowAlt:
HasConstraints = true;
goto after_scan;
goto after_lookahead;
case NodeKind::Equals:
case NodeKind::BlockStart:
case NodeKind::LineFoldEnd:
case NodeKind::EndOfFile:
goto after_scan;
goto after_lookahead;
default:
break;
}
}
}
after_scan:
after_lookahead:
if (!HasConstraints) {
return parseArrowTypeExpression();
}
@ -135,36 +142,58 @@ after_scan:
std::vector<std::tuple<ConstraintExpression*, Comma*>> Constraints;
RParen* RParen;
RArrowAlt* RArrowAlt;
auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::RParen) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
goto after_constraints;
}
for (;;) {
ConstraintExpression* C;
auto T0 = Tokens.peek();
switch (T0->getKind()) {
case NodeKind::RParen:
Tokens.get();
RParen = static_cast<class RParen*>(T0);
RArrowAlt = expectToken<class RArrowAlt>();
goto after_constraints;
default:
C = parseConstraintExpression();
break;
}
auto C = parseConstraintExpression();
Comma* Comma = nullptr;
auto T1 = Tokens.get();
switch (T1->getKind()) {
auto T2 = Tokens.get();
switch (T2->getKind()) {
case NodeKind::Comma:
Constraints.push_back(std::make_tuple(C, static_cast<class Comma*>(T1)));
{
auto Comma = static_cast<class Comma*>(T2);
if (C) {
Constraints.push_back(std::make_tuple(C, Comma));
} else {
Comma->unref();
}
continue;
}
case NodeKind::RParen:
RArrowAlt = static_cast<class RArrowAlt*>(T1);
Constraints.push_back(std::make_tuple(C, nullptr));
RArrowAlt = expectToken<class RArrowAlt>();
RParen = static_cast<class RParen*>(T2);
if (C) {
Constraints.push_back(std::make_tuple(C, nullptr));
}
goto after_constraints;
default:
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::Comma, NodeKind::RArrowAlt });
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RArrowAlt });
return nullptr;
}
}
after_constraints:
RArrowAlt = expectToken<class RArrowAlt>();
if (!RArrowAlt) {
LParen->unref();
for (auto [CE, Comma]: Constraints) {
CE->unref();
}
RParen->unref();
return nullptr;
}
auto TE = parseArrowTypeExpression();
if (!TE) {
LParen->unref();
for (auto [CE, Comma]: Constraints) {
CE->unref();
}
RParen->unref();
RArrowAlt->unref();
return nullptr;
}
return new QualifiedTypeExpression(Constraints, RArrowAlt, TE);
}
@ -187,6 +216,14 @@ after_constraints:
break;
}
auto TE = parseTypeExpression();
if (!TE) {
LParen->unref();
for (auto [TE, Comma]: Elements) {
TE->unref();
Comma->unref();
}
return nullptr;
}
auto T2 = Tokens.get();
switch (T2->getKind()) {
case NodeKind::RParen:
@ -197,7 +234,13 @@ after_constraints:
Elements.push_back({ TE, static_cast<Comma*>(T2) });
continue;
default:
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::Comma, NodeKind::RParen });
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RParen });
LParen->unref();
for (auto [TE, Comma]: Elements) {
TE->unref();
Comma->unref();
}
return nullptr;
}
}
after_tuple_element:
@ -207,25 +250,39 @@ after_tuple_element:
return new TupleTypeExpression { LParen, Elements, RParen };
}
case NodeKind::IdentifierAlt:
{
std::vector<std::tuple<IdentifierAlt*, Dot*>> ModulePath;
auto Name = expectToken<IdentifierAlt>();
for (;;) {
auto T1 = Tokens.peek();
if (T1->getKind() != NodeKind::Dot) {
break;
}
Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(Name), static_cast<Dot*>(T1)));
Name = expectToken<IdentifierAlt>();
}
return new ReferenceTypeExpression(ModulePath, static_cast<IdentifierAlt*>(Name));
}
return parseReferenceTypeExpression();
default:
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier });
Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen });
return nullptr;
}
}
ReferenceTypeExpression* Parser::parseReferenceTypeExpression() {
std::vector<std::tuple<IdentifierAlt*, Dot*>> ModulePath;
auto Name = expectToken<IdentifierAlt>();
if (!Name) {
return nullptr;
}
for (;;) {
auto T1 = Tokens.peek();
if (T1->getKind() != NodeKind::Dot) {
break;
}
Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(Name), static_cast<Dot*>(T1)));
Name = expectToken<IdentifierAlt>();
if (!Name) {
for (auto [Name, Dot]: ModulePath) {
Name->unref();
Dot->unref();
}
return nullptr;
}
}
return new ReferenceTypeExpression(ModulePath, static_cast<IdentifierAlt*>(Name));
}
TypeExpression* Parser::parseArrowTypeExpression() {
auto RetType = parsePrimitiveTypeExpression();
std::vector<TypeExpression*> ParamTypes;
@ -237,6 +294,12 @@ after_tuple_element:
Tokens.get();
ParamTypes.push_back(RetType);
RetType = parsePrimitiveTypeExpression();
if (!RetType) {
for (auto ParamType: ParamTypes) {
ParamType->unref();
}
return nullptr;
}
}
if (!ParamTypes.empty()) {
return new ArrowTypeExpression(ParamTypes, RetType);
@ -244,6 +307,81 @@ after_tuple_element:
return RetType;
}
MatchExpression* Parser::parseMatchExpression() {
auto T0 = expectToken<MatchKeyword>();
if (!T0) {
return nullptr;
}
Tokens.get();
auto T1 = Tokens.peek();
Expression* Value;
BlockStart* BlockStart;
if (llvm::isa<class BlockStart>(T1)) {
Value = nullptr;
BlockStart = static_cast<class BlockStart*>(T1);
Tokens.get();
} else {
Value = parseExpression();
if (!Value) {
T0->unref();
return nullptr;
}
BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
T0->unref();
Value->unref();
return nullptr;
}
}
std::vector<MatchCase*> Cases;
for (;;) {
auto T2 = Tokens.peek();
if (llvm::isa<BlockEnd>(T2)) {
Tokens.get();
break;
}
auto Pattern = parsePattern();
if (!Pattern) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
skipToLineFoldEnd();
continue;
}
auto RArrowAlt = expectToken<class RArrowAlt>();
if (!RArrowAlt) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
Pattern->unref();
skipToLineFoldEnd();
continue;
}
auto Expression = parseExpression();
if (!Expression) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
Pattern->unref();
RArrowAlt->unref();
skipToLineFoldEnd();
continue;
}
checkLineFoldEnd();
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
}
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
}
Expression* Parser::parsePrimitiveExpression() {
auto T0 = Tokens.peek();
switch (T0->getKind()) {
@ -258,12 +396,17 @@ after_tuple_element:
break;
}
Tokens.get();
auto Dot = expectToken<class Dot>();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), Dot));
Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), static_cast<class Dot*>(T2)));
}
auto T3 = Tokens.get();
if (!llvm::isa<Symbol>(T3)) {
throw UnexpectedTokenDiagnostic(File, T3, { NodeKind::Identifier, NodeKind::IdentifierAlt });
for (auto [Name, Dot]: ModulePath) {
Name->unref();
Dot->unref();
}
DE.add<UnexpectedTokenDiagnostic>(File, T3, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt });
return nullptr;
}
return new ReferenceExpression(ModulePath, static_cast<Symbol*>(T3));
}
@ -273,73 +416,73 @@ after_tuple_element:
std::vector<std::tuple<Expression*, Comma*>> Elements;
auto LParen = static_cast<class LParen*>(T0);
RParen* RParen;
auto T1 = Tokens.peek();
if (llvm::isa<class RParen>(T1)) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
goto after_tuple_elements;
}
for (;;) {
auto T1 = Tokens.peek();
if (llvm::isa<class RParen>(T1)) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
break;
}
auto E = parseExpression();
if (!E) {
LParen->unref();
for (auto [E, Comma]: Elements) {
E->unref();
Comma->unref();
}
return nullptr;
}
auto T2 = Tokens.get();
switch (T2->getKind()) {
case NodeKind::RParen:
RParen = static_cast<class RParen*>(T2);
Elements.push_back({ E, nullptr });
goto finish;
goto after_tuple_elements;
case NodeKind::Comma:
Elements.push_back({ E, static_cast<class Comma*>(T2) });
break;
default:
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::RParen, NodeKind::Comma });
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
LParen->unref();
for (auto [E, Comma]: Elements) {
E->unref();
Comma->unref();
}
return nullptr;
case NodeKind::LineFoldEnd:
case NodeKind::BlockStart:
case NodeKind::EndOfFile:
// Can recover from this one
RParen = nullptr;
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
goto after_tuple_elements;
}
}
finish:
after_tuple_elements:
if (Elements.size() == 1 && !std::get<1>(Elements.front())) {
return new NestedExpression(LParen, std::get<0>(Elements.front()), RParen);
}
return new TupleExpression { LParen, Elements, RParen };
}
case NodeKind::MatchKeyword:
{
Tokens.get();
auto T1 = Tokens.peek();
Expression* Value;
BlockStart* BlockStart;
if (llvm::isa<class BlockStart>(T1)) {
Value = nullptr;
BlockStart = static_cast<class BlockStart*>(T1);
Tokens.get();
} else {
Value = parseExpression();
BlockStart = expectToken<class BlockStart>();
}
std::vector<MatchCase*> Cases;
for (;;) {
auto T2 = Tokens.peek();
if (llvm::isa<BlockEnd>(T2)) {
Tokens.get();
break;
}
auto Pattern = parsePattern();
auto RArrowAlt = expectToken<class RArrowAlt>();
auto Expression = parseExpression();
expectToken<LineFoldEnd>();
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
}
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
}
return parseMatchExpression();
case NodeKind::IntegerLiteral:
case NodeKind::StringLiteral:
Tokens.get();
return new ConstantExpression(static_cast<Literal*>(T0));
default:
throw UnexpectedTokenDiagnostic(File, T0, { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::IntegerLiteral, NodeKind::StringLiteral });
Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen, NodeKind::IntegerLiteral, NodeKind::StringLiteral });
return nullptr;
}
}
Expression* Parser::parseMemberExpression() {
auto E = parsePrimitiveExpression();
if (!E) {
return nullptr;
}
for (;;) {
auto T1 = Tokens.peek(0);
auto T2 = Tokens.peek(1);
@ -363,13 +506,24 @@ finish:
Expression* Parser::parseCallExpression() {
auto Operator = parseMemberExpression();
if (!Operator) {
return nullptr;
}
std::vector<Expression*> Args;
for (;;) {
auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::LineFoldEnd || T1->getKind() == NodeKind::RParen || T1->getKind() == NodeKind::BlockStart || T1->getKind() == NodeKind::Comma || ExprOperators.isInfix(T1)) {
break;
}
Args.push_back(parsePrimitiveExpression());
auto Arg = parsePrimitiveExpression();
if (!Arg) {
Operator->unref();
for (auto Arg: Args) {
Arg->unref();
}
return nullptr;
}
Args.push_back(Arg);
}
if (Args.empty()) {
return Operator;
@ -388,13 +542,19 @@ finish:
Prefix.push_back(T0);
}
auto E = parseCallExpression();
if (!E) {
for (auto Tok: Prefix) {
Tok->unref();
}
return nullptr;
}
for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) {
E = new PrefixExpression(*Iter, E);
}
return E;
}
Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) {
Expression* Parser::parseInfixOperatorAfterExpression(Expression* Left, int MinPrecedence) {
for (;;) {
auto T0 = Tokens.peek();
auto Info0 = ExprOperators.getInfix(T0);
@ -402,46 +562,88 @@ finish:
break;
}
Tokens.get();
auto RHS = parseUnaryExpression();
auto Right = parseUnaryExpression();
if (!Right) {
Left->unref();
T0->unref();
return nullptr;
}
for (;;) {
auto T1 = Tokens.peek();
auto Info1 = ExprOperators.getInfix(T1);
if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) {
break;
}
RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence);
auto NewRight = parseInfixOperatorAfterExpression(Right, Info1->Precedence);
if (!NewRight) {
Left->unref();
T0->unref();
Right->unref();
return nullptr;
}
Right = NewRight;
}
LHS = new InfixExpression(LHS, T0, RHS);
Left = new InfixExpression(Left, T0, Right);
}
return LHS;
return Left;
}
Expression* Parser::parseExpression() {
return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0);
auto Left = parseUnaryExpression();
if (!Left) {
return nullptr;
}
return parseInfixOperatorAfterExpression(Left, 0);
}
ExpressionStatement* Parser::parseExpressionStatement() {
auto E = parseExpression();
BOLT_EXPECT_TOKEN(LineFoldEnd);
if (!E) {
skipToLineFoldEnd();
return nullptr;
}
checkLineFoldEnd();
return new ExpressionStatement(E);
}
ReturnStatement* Parser::parseReturnStatement() {
auto T0 = static_cast<ReturnKeyword*>(expectToken(NodeKind::ReturnKeyword));
Expression* Expression = nullptr;
auto T1 = Tokens.peek();
if (T1->getKind() != NodeKind::LineFoldEnd) {
Expression = parseExpression();
auto ReturnKeyword = expectToken<class ReturnKeyword>();
if (!ReturnKeyword) {
return nullptr;
}
BOLT_EXPECT_TOKEN(LineFoldEnd);
return new ReturnStatement(static_cast<ReturnKeyword*>(T0), Expression);
Expression* Expression;
auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::LineFoldEnd) {
Tokens.get()->unref();
Expression = nullptr;
} else {
Expression = parseExpression();
if (!Expression) {
ReturnKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
checkLineFoldEnd();
}
return new ReturnStatement(ReturnKeyword, Expression);
}
IfStatement* Parser::parseIfStatement() {
std::vector<IfStatementPart*> Parts;
auto T0 = expectToken(NodeKind::IfKeyword);
auto IfKeyword = expectToken<class IfKeyword>();
auto Test = parseExpression();
auto T1 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart));
if (!Test) {
IfKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
auto T1 = expectToken<BlockStart>();
if (!T1) {
IfKeyword->unref();
Test->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Then;
for (;;) {
auto T2 = Tokens.peek();
@ -449,14 +651,23 @@ finish:
Tokens.get();
break;
}
Then.push_back(parseLetBodyElement());
auto Element = parseLetBodyElement();
if (Element) {
Then.push_back(Element);
}
}
Parts.push_back(new IfStatementPart(T0, Test, T1, Then));
BOLT_EXPECT_TOKEN(LineFoldEnd)
Tokens.get(); // Always a LineFoldEnd
Parts.push_back(new IfStatementPart(IfKeyword, Test, T1, Then));
auto T3 = Tokens.peek();
if (T3->getKind() == NodeKind::ElseKeyword) {
Tokens.get();
auto T4 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart));
auto T4 = expectToken<BlockStart>();
if (!T4) {
for (auto Part: Parts) {
Part->unref();
}
return nullptr;
}
std::vector<Node*> Else;
for (;;) {
auto T5 = Tokens.peek();
@ -464,10 +675,13 @@ finish:
Tokens.get();
break;
}
Else.push_back(parseLetBodyElement());
auto Element = parseLetBodyElement();
if (Element) {
Else.push_back(Element);
}
}
Tokens.get(); // Always a LineFoldEnd
Parts.push_back(new IfStatementPart(T3, nullptr, T4, Else));
BOLT_EXPECT_TOKEN(LineFoldEnd)
}
return new IfStatement(Parts);
}
@ -477,13 +691,21 @@ finish:
PubKeyword* Pub = nullptr;
LetKeyword* Let;
MutKeyword* Mut = nullptr;
TypeAssert* TA = nullptr;
LetBody* Body = nullptr;
auto T0 = Tokens.get();
if (T0->getKind() == NodeKind::PubKeyword) {
Pub = static_cast<PubKeyword*>(T0);
T0 = Tokens.get();
}
if (T0->getKind() != NodeKind::LetKeyword) {
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::LetKeyword });
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LetKeyword });
if (Pub) {
Pub->unref();
}
skipToLineFoldEnd();
return nullptr;
}
Let = static_cast<LetKeyword*>(T0);
auto T1 = Tokens.peek();
@ -493,6 +715,17 @@ finish:
}
auto Patt = parsePattern();
if (!Patt) {
if (Pub) {
Pub->unref();
}
Let->unref();
if (Mut) {
Mut->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Parameter*> Params;
Token* T2;
@ -505,21 +738,28 @@ finish:
case NodeKind::Colon:
goto after_params;
default:
Params.push_back(new Parameter(parsePattern(), nullptr));
auto P = parsePattern();
if (P == nullptr) {
P = new BindPattern(new Identifier("_"));
}
Params.push_back(new Parameter(P, nullptr));
}
}
after_params:
TypeAssert* TA = nullptr;
if (T2->getKind() == NodeKind::Colon) {
Tokens.get();
auto TE = parseTypeExpression();
TA = new TypeAssert(static_cast<Colon*>(T2), TE);
if (TE) {
TA = new TypeAssert(static_cast<Colon*>(T2), TE);
} else {
skipToLineFoldEnd();
goto finish;
}
T2 = Tokens.peek();
}
LetBody* Body;
switch (T2->getKind()) {
case NodeKind::BlockStart:
{
@ -530,18 +770,29 @@ after_params:
if (T3->getKind() == NodeKind::BlockEnd) {
break;
}
Elements.push_back(parseLetBodyElement());
auto Element = parseLetBodyElement();
if (Element) {
Elements.push_back(Element);
}
}
Tokens.get();
Body = new LetBlockBody(static_cast<BlockStart*>(T2), Elements);
break;
}
case NodeKind::Equals:
{
Tokens.get();
Body = new LetExprBody(static_cast<Equals*>(T2), parseExpression());
auto E = parseExpression();
if (E == nullptr) {
skipToLineFoldEnd();
goto finish;
}
if (E) {
Body = new LetExprBody(static_cast<Equals*>(T2), E);
}
break;
}
case NodeKind::LineFoldEnd:
Body = nullptr;
break;
default:
std::vector<NodeKind> Expected { NodeKind::BlockStart, NodeKind::LineFoldEnd, NodeKind::Equals };
@ -551,11 +802,14 @@ after_params:
// First tokens of Pattern
Expected.push_back(NodeKind::Identifier);
}
throw UnexpectedTokenDiagnostic(File, T2, Expected);
DE.add<UnexpectedTokenDiagnostic>(File, T2, Expected);
}
BOLT_EXPECT_TOKEN(LineFoldEnd);
after_body:
checkLineFoldEnd();
finish:
return new LetDeclaration(
Pub,
Let,
@ -588,24 +842,39 @@ after_params:
switch (Tok->getKind()) {
case NodeKind::Tilde:
HasTilde = true;
goto after_seek;
goto after_lookahead;
case NodeKind::RParen:
case NodeKind::Comma:
case NodeKind::RArrowAlt:
case NodeKind::EndOfFile:
goto after_seek;
goto after_lookahead;
default:
continue;
}
}
after_seek:
after_lookahead:
if (HasTilde) {
auto Left = parseArrowTypeExpression();
if (!Left) {
return nullptr;
}
auto Tilde = expectToken<class Tilde>();
if (!Tilde) {
Left->unref();
return nullptr;
}
auto Right = parseArrowTypeExpression();
if (!Right) {
Left->unref();
Tilde->unref();
return nullptr;
}
return new EqualityConstraintExpression { Left, Tilde, Right };
}
auto Name = expectToken<IdentifierAlt>();
if (!Name) {
return nullptr;
}
std::vector<VarTypeExpression*> TEs;
for (;;) {
auto T1 = Tokens.peek();
@ -619,7 +888,9 @@ after_seek:
TEs.push_back(new VarTypeExpression { static_cast<Identifier*>(T1) });
break;
default:
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier });
DE.add<UnexpectedTokenDiagnostic>(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier });
Name->unref();
return nullptr;
}
}
after_vars:
@ -628,22 +899,60 @@ after_vars:
VarTypeExpression* Parser::parseVarTypeExpression() {
auto Name = expectToken<Identifier>();
// TODO reject constructor symbols (starting with a capital letter)
if (!Name) {
return nullptr;
}
for (auto Ch: Name->Text) {
if (!std::islower(Ch)) {
// TODO
// DE.add<TypeVarMustContainLowercaseLettersDiagnostic>(Name);
Name->unref();
return nullptr;
}
}
return new VarTypeExpression { Name };
}
InstanceDeclaration* Parser::parseInstanceDeclaration() {
auto InstanceKeyword = expectToken<class InstanceKeyword>();
if (!InstanceKeyword) {
skipToLineFoldEnd();
return nullptr;
}
auto Name = expectToken<IdentifierAlt>();
if (!Name) {
InstanceKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<TypeExpression*> TypeExps;
for (;;) {
auto T1 = Tokens.peek();
if (T1->is<BlockStart>()) {
break;
}
TypeExps.push_back(parseTypeExpression());
auto TE = parseTypeExpression();
if (!TE) {
InstanceKeyword->unref();
Name->unref();
for (auto TE: TypeExps) {
TE->unref();
}
skipToLineFoldEnd();
return nullptr;
}
TypeExps.push_back(TE);
}
auto BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
InstanceKeyword->unref();
Name->unref();
for (auto TE: TypeExps) {
TE->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Elements;
for (;;) {
auto T2 = Tokens.peek();
@ -651,9 +960,12 @@ after_vars:
Tokens.get();
break;
}
Elements.push_back(parseClassElement());
auto Element = parseClassElement();
if (Element) {
Elements.push_back(Element);
}
}
expectToken(NodeKind::LineFoldEnd);
checkLineFoldEnd();
return new InstanceDeclaration(
InstanceKeyword,
Name,
@ -671,16 +983,54 @@ after_vars:
PubKeyword = static_cast<class PubKeyword*>(T0);
}
auto ClassKeyword = expectToken<class ClassKeyword>();
if (!ClassKeyword) {
if (PubKeyword) {
PubKeyword->unref();
}
skipToLineFoldEnd();
return nullptr;
}
auto Name = expectToken<IdentifierAlt>();
if (!Name) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<VarTypeExpression*> TypeVars;
for (;;) {
auto T2 = Tokens.peek();
if (T2->getKind() == NodeKind::BlockStart) {
break;
}
TypeVars.push_back(parseVarTypeExpression());
auto TE = parseVarTypeExpression();
if (!TE) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
for (auto TV: TypeVars) {
TV->unref();
}
skipToLineFoldEnd();
return nullptr;
}
TypeVars.push_back(TE);
}
auto BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
for (auto TV: TypeVars) {
TV->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Elements;
for (;;) {
auto T2 = Tokens.peek();
@ -688,9 +1038,12 @@ after_vars:
Tokens.get();
break;
}
Elements.push_back(parseClassElement());
auto Element = parseClassElement();
if (Element) {
Elements.push_back(Element);
}
}
expectToken(NodeKind::LineFoldEnd);
Tokens.get(); // Always a LineFoldEnd
return new ClassDeclaration(
PubKeyword,
ClassKeyword,
@ -736,10 +1089,49 @@ after_vars:
if (T0->is<EndOfFile>()) {
break;
}
Elements.push_back(parseSourceElement());
auto Element = parseSourceElement();
if (Element) {
Elements.push_back(Element);
}
}
return new SourceFile(File, Elements);
}
void Parser::skipToLineFoldEnd() {
unsigned Level = 0;
for (;;) {
auto T0 = Tokens.get();
switch (T0->getKind()) {
case NodeKind::EndOfFile:
return;
case NodeKind::LineFoldEnd:
T0->unref();
if (Level == 0) {
return;
}
break;
case NodeKind::BlockStart:
T0->unref();
Level++;
break;
case NodeKind::BlockEnd:
T0->unref();
Level--;
break;
default:
T0->unref();
break;
}
}
}
void Parser::checkLineFoldEnd() {
auto T0 = Tokens.get();
if (T0->getKind() != NodeKind::LineFoldEnd) {
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LineFoldEnd });
skipToLineFoldEnd();
}
}
}

View file

@ -72,6 +72,7 @@ namespace bolt {
{ "match", NodeKind::MatchKeyword },
{ "class", NodeKind::ClassKeyword },
{ "instance", NodeKind::InstanceKeyword },
{ "struct", NodeKind::StructKeyword },
};
Scanner::Scanner(TextFile& File, Stream<Char>& Chars):
@ -242,6 +243,8 @@ digit_finish:
return new ClassKeyword(StartLoc);
case NodeKind::InstanceKeyword:
return new InstanceKeyword(StartLoc);
case NodeKind::StructKeyword:
return new StructKeyword(StartLoc);
default:
ZEN_UNREACHABLE
}