Make parser more forgiving

This commit is contained in:
Sam Vervaeck 2023-05-26 19:51:12 +02:00
parent 4acb76159f
commit 9afdd1e607
Signed by: samvv
SSH key fingerprint: SHA256:dIg0ywU1OP+ZYifrYxy8c5esO72cIKB+4/9wkZj1VaY
6 changed files with 543 additions and 129 deletions

View file

@ -31,6 +31,10 @@ namespace bolt {
size_t Line = 1; size_t Line = 1;
size_t Column = 1; size_t Column = 1;
inline bool isEmpty() const noexcept {
return Line == 0 && Column == 0;
}
inline void advance(const ByteString& Text) { inline void advance(const ByteString& Text) {
for (auto Chr: Text) { for (auto Chr: Text) {
if (Chr == '\n') { if (Chr == '\n') {
@ -48,6 +52,10 @@ namespace bolt {
return Out; return Out;
} }
static TextLoc empty() {
return TextLoc { 0, 0 };
}
}; };
struct TextRange { struct TextRange {
@ -820,7 +828,7 @@ namespace bolt {
ByteString Text; ByteString Text;
Identifier(ByteString Text, TextLoc StartLoc): Identifier(ByteString Text, TextLoc StartLoc = TextLoc::empty()):
Symbol(NodeKind::Identifier, StartLoc), Text(Text) {} Symbol(NodeKind::Identifier, StartLoc), Text(Text) {}
ByteString getCanonicalText() override; ByteString getCanonicalText() override;
@ -1794,6 +1802,7 @@ namespace bolt {
template<> inline NodeKind getNodeType<InstanceKeyword>() { return NodeKind::InstanceKeyword; } template<> inline NodeKind getNodeType<InstanceKeyword>() { return NodeKind::InstanceKeyword; }
template<> inline NodeKind getNodeType<ElifKeyword>() { return NodeKind::ElifKeyword; } template<> inline NodeKind getNodeType<ElifKeyword>() { return NodeKind::ElifKeyword; }
template<> inline NodeKind getNodeType<IfKeyword>() { return NodeKind::IfKeyword; } template<> inline NodeKind getNodeType<IfKeyword>() { return NodeKind::IfKeyword; }
template<> inline NodeKind getNodeType<MatchKeyword>() { return NodeKind::MatchKeyword; }
template<> inline NodeKind getNodeType<ElseKeyword>() { return NodeKind::ElseKeyword; } template<> inline NodeKind getNodeType<ElseKeyword>() { return NodeKind::ElseKeyword; }
template<> inline NodeKind getNodeType<Invalid>() { return NodeKind::Invalid; } template<> inline NodeKind getNodeType<Invalid>() { return NodeKind::Invalid; }
template<> inline NodeKind getNodeType<EndOfFile>() { return NodeKind::EndOfFile; } template<> inline NodeKind getNodeType<EndOfFile>() { return NodeKind::EndOfFile; }

View file

@ -80,6 +80,7 @@ namespace bolt {
Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence); Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence);
MatchExpression* parseMatchExpression();
Expression* parseMemberExpression(); Expression* parseMemberExpression();
Expression* parsePrimitiveExpression(); Expression* parsePrimitiveExpression();
@ -89,6 +90,10 @@ namespace bolt {
TypeExpression* parseQualifiedTypeExpression(); TypeExpression* parseQualifiedTypeExpression();
TypeExpression* parseArrowTypeExpression(); TypeExpression* parseArrowTypeExpression();
VarTypeExpression* parseVarTypeExpression(); VarTypeExpression* parseVarTypeExpression();
ReferenceTypeExpression* parseReferenceTypeExpression();
void checkLineFoldEnd();
void skipToLineFoldEnd();
public: public:

View file

@ -6,9 +6,9 @@
#include <deque> #include <deque>
#include <stack> #include <stack>
#include "bolt/Text.hpp"
#include "bolt/String.hpp" #include "bolt/String.hpp"
#include "bolt/Stream.hpp" #include "bolt/Stream.hpp"
#include "bolt/CST.hpp"
namespace bolt { namespace bolt {

View file

@ -1,4 +1,6 @@
// TODO make a more elegant mechanism to ref() and unref() items in a buffered stream
#pragma once #pragma once
#include <cstddef> #include <cstddef>
@ -60,13 +62,16 @@ namespace bolt {
} else { } else {
auto Keep = Buffer.front(); auto Keep = Buffer.front();
Buffer.pop_front(); Buffer.pop_front();
Keep->unref();
return Keep; return Keep;
} }
} }
value_type peek(std::size_t Offset = 0) override { value_type peek(std::size_t Offset = 0) override {
while (Buffer.size() <= Offset) { while (Buffer.size() <= Offset) {
Buffer.push_back(read()); auto Item = read();
Item->ref();
Buffer.push_back(Item);
} }
return Buffer[Offset]; return Buffer[Offset];
} }

View file

@ -1,4 +1,6 @@
// TODO check for memory leaks everywhere a nullptr is returned
#include <exception> #include <exception>
#include <vector> #include <vector>
@ -8,6 +10,7 @@
#include "bolt/Scanner.hpp" #include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp" #include "bolt/Parser.hpp"
#include "bolt/Diagnostics.hpp" #include "bolt/Diagnostics.hpp"
#include "bolt/DiagnosticEngine.hpp"
namespace bolt { namespace bolt {
@ -79,10 +82,12 @@ namespace bolt {
} }
Token* Parser::expectToken(NodeKind Kind) { Token* Parser::expectToken(NodeKind Kind) {
auto T = Tokens.get(); auto T = Tokens.peek();
if (T->getKind() != Kind) { if (T->getKind() != Kind) {
throw UnexpectedTokenDiagnostic(File, T, std::vector<NodeKind> { Kind }); \ DE.add<UnexpectedTokenDiagnostic>(File, T, std::vector<NodeKind> { Kind });
return nullptr;
} }
Tokens.get();
return T; return T;
} }
@ -97,7 +102,9 @@ namespace bolt {
Tokens.get(); Tokens.get();
return new BindPattern(static_cast<Identifier*>(T0)); return new BindPattern(static_cast<Identifier*>(T0));
default: default:
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral }); Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::StringLiteral, NodeKind::IntegerLiteral });
return nullptr;
} }
} }
@ -115,18 +122,18 @@ namespace bolt {
switch (T0->getKind()) { switch (T0->getKind()) {
case NodeKind::RArrowAlt: case NodeKind::RArrowAlt:
HasConstraints = true; HasConstraints = true;
goto after_scan; goto after_lookahead;
case NodeKind::Equals: case NodeKind::Equals:
case NodeKind::BlockStart: case NodeKind::BlockStart:
case NodeKind::LineFoldEnd: case NodeKind::LineFoldEnd:
case NodeKind::EndOfFile: case NodeKind::EndOfFile:
goto after_scan; goto after_lookahead;
default: default:
break; break;
} }
} }
} }
after_scan: after_lookahead:
if (!HasConstraints) { if (!HasConstraints) {
return parseArrowTypeExpression(); return parseArrowTypeExpression();
} }
@ -135,36 +142,58 @@ after_scan:
std::vector<std::tuple<ConstraintExpression*, Comma*>> Constraints; std::vector<std::tuple<ConstraintExpression*, Comma*>> Constraints;
RParen* RParen; RParen* RParen;
RArrowAlt* RArrowAlt; RArrowAlt* RArrowAlt;
auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::RParen) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
goto after_constraints;
}
for (;;) { for (;;) {
ConstraintExpression* C; auto C = parseConstraintExpression();
auto T0 = Tokens.peek();
switch (T0->getKind()) {
case NodeKind::RParen:
Tokens.get();
RParen = static_cast<class RParen*>(T0);
RArrowAlt = expectToken<class RArrowAlt>();
goto after_constraints;
default:
C = parseConstraintExpression();
break;
}
Comma* Comma = nullptr; Comma* Comma = nullptr;
auto T1 = Tokens.get(); auto T2 = Tokens.get();
switch (T1->getKind()) { switch (T2->getKind()) {
case NodeKind::Comma: case NodeKind::Comma:
Constraints.push_back(std::make_tuple(C, static_cast<class Comma*>(T1))); {
auto Comma = static_cast<class Comma*>(T2);
if (C) {
Constraints.push_back(std::make_tuple(C, Comma));
} else {
Comma->unref();
}
continue; continue;
}
case NodeKind::RParen: case NodeKind::RParen:
RArrowAlt = static_cast<class RArrowAlt*>(T1); RParen = static_cast<class RParen*>(T2);
Constraints.push_back(std::make_tuple(C, nullptr)); if (C) {
RArrowAlt = expectToken<class RArrowAlt>(); Constraints.push_back(std::make_tuple(C, nullptr));
}
goto after_constraints; goto after_constraints;
default: default:
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::Comma, NodeKind::RArrowAlt }); DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RArrowAlt });
return nullptr;
} }
} }
after_constraints: after_constraints:
RArrowAlt = expectToken<class RArrowAlt>();
if (!RArrowAlt) {
LParen->unref();
for (auto [CE, Comma]: Constraints) {
CE->unref();
}
RParen->unref();
return nullptr;
}
auto TE = parseArrowTypeExpression(); auto TE = parseArrowTypeExpression();
if (!TE) {
LParen->unref();
for (auto [CE, Comma]: Constraints) {
CE->unref();
}
RParen->unref();
RArrowAlt->unref();
return nullptr;
}
return new QualifiedTypeExpression(Constraints, RArrowAlt, TE); return new QualifiedTypeExpression(Constraints, RArrowAlt, TE);
} }
@ -187,6 +216,14 @@ after_constraints:
break; break;
} }
auto TE = parseTypeExpression(); auto TE = parseTypeExpression();
if (!TE) {
LParen->unref();
for (auto [TE, Comma]: Elements) {
TE->unref();
Comma->unref();
}
return nullptr;
}
auto T2 = Tokens.get(); auto T2 = Tokens.get();
switch (T2->getKind()) { switch (T2->getKind()) {
case NodeKind::RParen: case NodeKind::RParen:
@ -197,7 +234,13 @@ after_constraints:
Elements.push_back({ TE, static_cast<Comma*>(T2) }); Elements.push_back({ TE, static_cast<Comma*>(T2) });
continue; continue;
default: default:
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::Comma, NodeKind::RParen }); DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::Comma, NodeKind::RParen });
LParen->unref();
for (auto [TE, Comma]: Elements) {
TE->unref();
Comma->unref();
}
return nullptr;
} }
} }
after_tuple_element: after_tuple_element:
@ -207,25 +250,39 @@ after_tuple_element:
return new TupleTypeExpression { LParen, Elements, RParen }; return new TupleTypeExpression { LParen, Elements, RParen };
} }
case NodeKind::IdentifierAlt: case NodeKind::IdentifierAlt:
{ return parseReferenceTypeExpression();
std::vector<std::tuple<IdentifierAlt*, Dot*>> ModulePath;
auto Name = expectToken<IdentifierAlt>();
for (;;) {
auto T1 = Tokens.peek();
if (T1->getKind() != NodeKind::Dot) {
break;
}
Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(Name), static_cast<Dot*>(T1)));
Name = expectToken<IdentifierAlt>();
}
return new ReferenceTypeExpression(ModulePath, static_cast<IdentifierAlt*>(Name));
}
default: default:
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::Identifier }); Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen });
return nullptr;
} }
} }
ReferenceTypeExpression* Parser::parseReferenceTypeExpression() {
std::vector<std::tuple<IdentifierAlt*, Dot*>> ModulePath;
auto Name = expectToken<IdentifierAlt>();
if (!Name) {
return nullptr;
}
for (;;) {
auto T1 = Tokens.peek();
if (T1->getKind() != NodeKind::Dot) {
break;
}
Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(Name), static_cast<Dot*>(T1)));
Name = expectToken<IdentifierAlt>();
if (!Name) {
for (auto [Name, Dot]: ModulePath) {
Name->unref();
Dot->unref();
}
return nullptr;
}
}
return new ReferenceTypeExpression(ModulePath, static_cast<IdentifierAlt*>(Name));
}
TypeExpression* Parser::parseArrowTypeExpression() { TypeExpression* Parser::parseArrowTypeExpression() {
auto RetType = parsePrimitiveTypeExpression(); auto RetType = parsePrimitiveTypeExpression();
std::vector<TypeExpression*> ParamTypes; std::vector<TypeExpression*> ParamTypes;
@ -237,6 +294,12 @@ after_tuple_element:
Tokens.get(); Tokens.get();
ParamTypes.push_back(RetType); ParamTypes.push_back(RetType);
RetType = parsePrimitiveTypeExpression(); RetType = parsePrimitiveTypeExpression();
if (!RetType) {
for (auto ParamType: ParamTypes) {
ParamType->unref();
}
return nullptr;
}
} }
if (!ParamTypes.empty()) { if (!ParamTypes.empty()) {
return new ArrowTypeExpression(ParamTypes, RetType); return new ArrowTypeExpression(ParamTypes, RetType);
@ -244,6 +307,81 @@ after_tuple_element:
return RetType; return RetType;
} }
MatchExpression* Parser::parseMatchExpression() {
auto T0 = expectToken<MatchKeyword>();
if (!T0) {
return nullptr;
}
Tokens.get();
auto T1 = Tokens.peek();
Expression* Value;
BlockStart* BlockStart;
if (llvm::isa<class BlockStart>(T1)) {
Value = nullptr;
BlockStart = static_cast<class BlockStart*>(T1);
Tokens.get();
} else {
Value = parseExpression();
if (!Value) {
T0->unref();
return nullptr;
}
BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
T0->unref();
Value->unref();
return nullptr;
}
}
std::vector<MatchCase*> Cases;
for (;;) {
auto T2 = Tokens.peek();
if (llvm::isa<BlockEnd>(T2)) {
Tokens.get();
break;
}
auto Pattern = parsePattern();
if (!Pattern) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
skipToLineFoldEnd();
continue;
}
auto RArrowAlt = expectToken<class RArrowAlt>();
if (!RArrowAlt) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
Pattern->unref();
skipToLineFoldEnd();
continue;
}
auto Expression = parseExpression();
if (!Expression) {
T0->unref();
Value->unref();
BlockStart->unref();
for (auto Case: Cases) {
Case->unref();
}
Pattern->unref();
RArrowAlt->unref();
skipToLineFoldEnd();
continue;
}
checkLineFoldEnd();
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
}
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
}
Expression* Parser::parsePrimitiveExpression() { Expression* Parser::parsePrimitiveExpression() {
auto T0 = Tokens.peek(); auto T0 = Tokens.peek();
switch (T0->getKind()) { switch (T0->getKind()) {
@ -258,12 +396,17 @@ after_tuple_element:
break; break;
} }
Tokens.get(); Tokens.get();
auto Dot = expectToken<class Dot>(); Tokens.get();
ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), Dot)); ModulePath.push_back(std::make_tuple(static_cast<IdentifierAlt*>(T1), static_cast<class Dot*>(T2)));
} }
auto T3 = Tokens.get(); auto T3 = Tokens.get();
if (!llvm::isa<Symbol>(T3)) { if (!llvm::isa<Symbol>(T3)) {
throw UnexpectedTokenDiagnostic(File, T3, { NodeKind::Identifier, NodeKind::IdentifierAlt }); for (auto [Name, Dot]: ModulePath) {
Name->unref();
Dot->unref();
}
DE.add<UnexpectedTokenDiagnostic>(File, T3, std::vector { NodeKind::Identifier, NodeKind::IdentifierAlt });
return nullptr;
} }
return new ReferenceExpression(ModulePath, static_cast<Symbol*>(T3)); return new ReferenceExpression(ModulePath, static_cast<Symbol*>(T3));
} }
@ -273,73 +416,73 @@ after_tuple_element:
std::vector<std::tuple<Expression*, Comma*>> Elements; std::vector<std::tuple<Expression*, Comma*>> Elements;
auto LParen = static_cast<class LParen*>(T0); auto LParen = static_cast<class LParen*>(T0);
RParen* RParen; RParen* RParen;
auto T1 = Tokens.peek();
if (llvm::isa<class RParen>(T1)) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
goto after_tuple_elements;
}
for (;;) { for (;;) {
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
if (llvm::isa<class RParen>(T1)) {
Tokens.get();
RParen = static_cast<class RParen*>(T1);
break;
}
auto E = parseExpression(); auto E = parseExpression();
if (!E) {
LParen->unref();
for (auto [E, Comma]: Elements) {
E->unref();
Comma->unref();
}
return nullptr;
}
auto T2 = Tokens.get(); auto T2 = Tokens.get();
switch (T2->getKind()) { switch (T2->getKind()) {
case NodeKind::RParen: case NodeKind::RParen:
RParen = static_cast<class RParen*>(T2); RParen = static_cast<class RParen*>(T2);
Elements.push_back({ E, nullptr }); Elements.push_back({ E, nullptr });
goto finish; goto after_tuple_elements;
case NodeKind::Comma: case NodeKind::Comma:
Elements.push_back({ E, static_cast<class Comma*>(T2) }); Elements.push_back({ E, static_cast<class Comma*>(T2) });
break; break;
default: default:
throw UnexpectedTokenDiagnostic(File, T2, { NodeKind::RParen, NodeKind::Comma }); DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
LParen->unref();
for (auto [E, Comma]: Elements) {
E->unref();
Comma->unref();
}
return nullptr;
case NodeKind::LineFoldEnd:
case NodeKind::BlockStart:
case NodeKind::EndOfFile:
// Can recover from this one
RParen = nullptr;
DE.add<UnexpectedTokenDiagnostic>(File, T2, std::vector { NodeKind::RParen, NodeKind::Comma });
goto after_tuple_elements;
} }
} }
finish: after_tuple_elements:
if (Elements.size() == 1 && !std::get<1>(Elements.front())) { if (Elements.size() == 1 && !std::get<1>(Elements.front())) {
return new NestedExpression(LParen, std::get<0>(Elements.front()), RParen); return new NestedExpression(LParen, std::get<0>(Elements.front()), RParen);
} }
return new TupleExpression { LParen, Elements, RParen }; return new TupleExpression { LParen, Elements, RParen };
} }
case NodeKind::MatchKeyword: case NodeKind::MatchKeyword:
{ return parseMatchExpression();
Tokens.get();
auto T1 = Tokens.peek();
Expression* Value;
BlockStart* BlockStart;
if (llvm::isa<class BlockStart>(T1)) {
Value = nullptr;
BlockStart = static_cast<class BlockStart*>(T1);
Tokens.get();
} else {
Value = parseExpression();
BlockStart = expectToken<class BlockStart>();
}
std::vector<MatchCase*> Cases;
for (;;) {
auto T2 = Tokens.peek();
if (llvm::isa<BlockEnd>(T2)) {
Tokens.get();
break;
}
auto Pattern = parsePattern();
auto RArrowAlt = expectToken<class RArrowAlt>();
auto Expression = parseExpression();
expectToken<LineFoldEnd>();
Cases.push_back(new MatchCase { Pattern, RArrowAlt, Expression });
}
return new MatchExpression(static_cast<MatchKeyword*>(T0), Value, BlockStart, Cases);
}
case NodeKind::IntegerLiteral: case NodeKind::IntegerLiteral:
case NodeKind::StringLiteral: case NodeKind::StringLiteral:
Tokens.get(); Tokens.get();
return new ConstantExpression(static_cast<Literal*>(T0)); return new ConstantExpression(static_cast<Literal*>(T0));
default: default:
throw UnexpectedTokenDiagnostic(File, T0, { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::IntegerLiteral, NodeKind::StringLiteral }); Tokens.get();
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::MatchKeyword, NodeKind::Identifier, NodeKind::IdentifierAlt, NodeKind::LParen, NodeKind::IntegerLiteral, NodeKind::StringLiteral });
return nullptr;
} }
} }
Expression* Parser::parseMemberExpression() { Expression* Parser::parseMemberExpression() {
auto E = parsePrimitiveExpression(); auto E = parsePrimitiveExpression();
if (!E) {
return nullptr;
}
for (;;) { for (;;) {
auto T1 = Tokens.peek(0); auto T1 = Tokens.peek(0);
auto T2 = Tokens.peek(1); auto T2 = Tokens.peek(1);
@ -363,13 +506,24 @@ finish:
Expression* Parser::parseCallExpression() { Expression* Parser::parseCallExpression() {
auto Operator = parseMemberExpression(); auto Operator = parseMemberExpression();
if (!Operator) {
return nullptr;
}
std::vector<Expression*> Args; std::vector<Expression*> Args;
for (;;) { for (;;) {
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::LineFoldEnd || T1->getKind() == NodeKind::RParen || T1->getKind() == NodeKind::BlockStart || T1->getKind() == NodeKind::Comma || ExprOperators.isInfix(T1)) { if (T1->getKind() == NodeKind::LineFoldEnd || T1->getKind() == NodeKind::RParen || T1->getKind() == NodeKind::BlockStart || T1->getKind() == NodeKind::Comma || ExprOperators.isInfix(T1)) {
break; break;
} }
Args.push_back(parsePrimitiveExpression()); auto Arg = parsePrimitiveExpression();
if (!Arg) {
Operator->unref();
for (auto Arg: Args) {
Arg->unref();
}
return nullptr;
}
Args.push_back(Arg);
} }
if (Args.empty()) { if (Args.empty()) {
return Operator; return Operator;
@ -388,13 +542,19 @@ finish:
Prefix.push_back(T0); Prefix.push_back(T0);
} }
auto E = parseCallExpression(); auto E = parseCallExpression();
if (!E) {
for (auto Tok: Prefix) {
Tok->unref();
}
return nullptr;
}
for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) { for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) {
E = new PrefixExpression(*Iter, E); E = new PrefixExpression(*Iter, E);
} }
return E; return E;
} }
Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) { Expression* Parser::parseInfixOperatorAfterExpression(Expression* Left, int MinPrecedence) {
for (;;) { for (;;) {
auto T0 = Tokens.peek(); auto T0 = Tokens.peek();
auto Info0 = ExprOperators.getInfix(T0); auto Info0 = ExprOperators.getInfix(T0);
@ -402,46 +562,88 @@ finish:
break; break;
} }
Tokens.get(); Tokens.get();
auto RHS = parseUnaryExpression(); auto Right = parseUnaryExpression();
if (!Right) {
Left->unref();
T0->unref();
return nullptr;
}
for (;;) { for (;;) {
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
auto Info1 = ExprOperators.getInfix(T1); auto Info1 = ExprOperators.getInfix(T1);
if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) { if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) {
break; break;
} }
RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence); auto NewRight = parseInfixOperatorAfterExpression(Right, Info1->Precedence);
if (!NewRight) {
Left->unref();
T0->unref();
Right->unref();
return nullptr;
}
Right = NewRight;
} }
LHS = new InfixExpression(LHS, T0, RHS); Left = new InfixExpression(Left, T0, Right);
} }
return LHS; return Left;
} }
Expression* Parser::parseExpression() { Expression* Parser::parseExpression() {
return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0); auto Left = parseUnaryExpression();
if (!Left) {
return nullptr;
}
return parseInfixOperatorAfterExpression(Left, 0);
} }
ExpressionStatement* Parser::parseExpressionStatement() { ExpressionStatement* Parser::parseExpressionStatement() {
auto E = parseExpression(); auto E = parseExpression();
BOLT_EXPECT_TOKEN(LineFoldEnd); if (!E) {
skipToLineFoldEnd();
return nullptr;
}
checkLineFoldEnd();
return new ExpressionStatement(E); return new ExpressionStatement(E);
} }
ReturnStatement* Parser::parseReturnStatement() { ReturnStatement* Parser::parseReturnStatement() {
auto T0 = static_cast<ReturnKeyword*>(expectToken(NodeKind::ReturnKeyword)); auto ReturnKeyword = expectToken<class ReturnKeyword>();
Expression* Expression = nullptr; if (!ReturnKeyword) {
auto T1 = Tokens.peek(); return nullptr;
if (T1->getKind() != NodeKind::LineFoldEnd) {
Expression = parseExpression();
} }
BOLT_EXPECT_TOKEN(LineFoldEnd); Expression* Expression;
return new ReturnStatement(static_cast<ReturnKeyword*>(T0), Expression); auto T1 = Tokens.peek();
if (T1->getKind() == NodeKind::LineFoldEnd) {
Tokens.get()->unref();
Expression = nullptr;
} else {
Expression = parseExpression();
if (!Expression) {
ReturnKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
checkLineFoldEnd();
}
return new ReturnStatement(ReturnKeyword, Expression);
} }
IfStatement* Parser::parseIfStatement() { IfStatement* Parser::parseIfStatement() {
std::vector<IfStatementPart*> Parts; std::vector<IfStatementPart*> Parts;
auto T0 = expectToken(NodeKind::IfKeyword); auto IfKeyword = expectToken<class IfKeyword>();
auto Test = parseExpression(); auto Test = parseExpression();
auto T1 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart)); if (!Test) {
IfKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
auto T1 = expectToken<BlockStart>();
if (!T1) {
IfKeyword->unref();
Test->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Then; std::vector<Node*> Then;
for (;;) { for (;;) {
auto T2 = Tokens.peek(); auto T2 = Tokens.peek();
@ -449,14 +651,23 @@ finish:
Tokens.get(); Tokens.get();
break; break;
} }
Then.push_back(parseLetBodyElement()); auto Element = parseLetBodyElement();
if (Element) {
Then.push_back(Element);
}
} }
Parts.push_back(new IfStatementPart(T0, Test, T1, Then)); Tokens.get(); // Always a LineFoldEnd
BOLT_EXPECT_TOKEN(LineFoldEnd) Parts.push_back(new IfStatementPart(IfKeyword, Test, T1, Then));
auto T3 = Tokens.peek(); auto T3 = Tokens.peek();
if (T3->getKind() == NodeKind::ElseKeyword) { if (T3->getKind() == NodeKind::ElseKeyword) {
Tokens.get(); Tokens.get();
auto T4 = static_cast<BlockStart*>(expectToken(NodeKind::BlockStart)); auto T4 = expectToken<BlockStart>();
if (!T4) {
for (auto Part: Parts) {
Part->unref();
}
return nullptr;
}
std::vector<Node*> Else; std::vector<Node*> Else;
for (;;) { for (;;) {
auto T5 = Tokens.peek(); auto T5 = Tokens.peek();
@ -464,10 +675,13 @@ finish:
Tokens.get(); Tokens.get();
break; break;
} }
Else.push_back(parseLetBodyElement()); auto Element = parseLetBodyElement();
if (Element) {
Else.push_back(Element);
}
} }
Tokens.get(); // Always a LineFoldEnd
Parts.push_back(new IfStatementPart(T3, nullptr, T4, Else)); Parts.push_back(new IfStatementPart(T3, nullptr, T4, Else));
BOLT_EXPECT_TOKEN(LineFoldEnd)
} }
return new IfStatement(Parts); return new IfStatement(Parts);
} }
@ -477,13 +691,21 @@ finish:
PubKeyword* Pub = nullptr; PubKeyword* Pub = nullptr;
LetKeyword* Let; LetKeyword* Let;
MutKeyword* Mut = nullptr; MutKeyword* Mut = nullptr;
TypeAssert* TA = nullptr;
LetBody* Body = nullptr;
auto T0 = Tokens.get(); auto T0 = Tokens.get();
if (T0->getKind() == NodeKind::PubKeyword) { if (T0->getKind() == NodeKind::PubKeyword) {
Pub = static_cast<PubKeyword*>(T0); Pub = static_cast<PubKeyword*>(T0);
T0 = Tokens.get(); T0 = Tokens.get();
} }
if (T0->getKind() != NodeKind::LetKeyword) { if (T0->getKind() != NodeKind::LetKeyword) {
throw UnexpectedTokenDiagnostic(File, T0, std::vector { NodeKind::LetKeyword }); DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LetKeyword });
if (Pub) {
Pub->unref();
}
skipToLineFoldEnd();
return nullptr;
} }
Let = static_cast<LetKeyword*>(T0); Let = static_cast<LetKeyword*>(T0);
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
@ -493,6 +715,17 @@ finish:
} }
auto Patt = parsePattern(); auto Patt = parsePattern();
if (!Patt) {
if (Pub) {
Pub->unref();
}
Let->unref();
if (Mut) {
Mut->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Parameter*> Params; std::vector<Parameter*> Params;
Token* T2; Token* T2;
@ -505,21 +738,28 @@ finish:
case NodeKind::Colon: case NodeKind::Colon:
goto after_params; goto after_params;
default: default:
Params.push_back(new Parameter(parsePattern(), nullptr)); auto P = parsePattern();
if (P == nullptr) {
P = new BindPattern(new Identifier("_"));
}
Params.push_back(new Parameter(P, nullptr));
} }
} }
after_params: after_params:
TypeAssert* TA = nullptr;
if (T2->getKind() == NodeKind::Colon) { if (T2->getKind() == NodeKind::Colon) {
Tokens.get(); Tokens.get();
auto TE = parseTypeExpression(); auto TE = parseTypeExpression();
TA = new TypeAssert(static_cast<Colon*>(T2), TE); if (TE) {
TA = new TypeAssert(static_cast<Colon*>(T2), TE);
} else {
skipToLineFoldEnd();
goto finish;
}
T2 = Tokens.peek(); T2 = Tokens.peek();
} }
LetBody* Body;
switch (T2->getKind()) { switch (T2->getKind()) {
case NodeKind::BlockStart: case NodeKind::BlockStart:
{ {
@ -530,18 +770,29 @@ after_params:
if (T3->getKind() == NodeKind::BlockEnd) { if (T3->getKind() == NodeKind::BlockEnd) {
break; break;
} }
Elements.push_back(parseLetBodyElement()); auto Element = parseLetBodyElement();
if (Element) {
Elements.push_back(Element);
}
} }
Tokens.get(); Tokens.get();
Body = new LetBlockBody(static_cast<BlockStart*>(T2), Elements); Body = new LetBlockBody(static_cast<BlockStart*>(T2), Elements);
break; break;
} }
case NodeKind::Equals: case NodeKind::Equals:
{
Tokens.get(); Tokens.get();
Body = new LetExprBody(static_cast<Equals*>(T2), parseExpression()); auto E = parseExpression();
if (E == nullptr) {
skipToLineFoldEnd();
goto finish;
}
if (E) {
Body = new LetExprBody(static_cast<Equals*>(T2), E);
}
break; break;
}
case NodeKind::LineFoldEnd: case NodeKind::LineFoldEnd:
Body = nullptr;
break; break;
default: default:
std::vector<NodeKind> Expected { NodeKind::BlockStart, NodeKind::LineFoldEnd, NodeKind::Equals }; std::vector<NodeKind> Expected { NodeKind::BlockStart, NodeKind::LineFoldEnd, NodeKind::Equals };
@ -551,11 +802,14 @@ after_params:
// First tokens of Pattern // First tokens of Pattern
Expected.push_back(NodeKind::Identifier); Expected.push_back(NodeKind::Identifier);
} }
throw UnexpectedTokenDiagnostic(File, T2, Expected); DE.add<UnexpectedTokenDiagnostic>(File, T2, Expected);
} }
BOLT_EXPECT_TOKEN(LineFoldEnd); after_body:
checkLineFoldEnd();
finish:
return new LetDeclaration( return new LetDeclaration(
Pub, Pub,
Let, Let,
@ -588,24 +842,39 @@ after_params:
switch (Tok->getKind()) { switch (Tok->getKind()) {
case NodeKind::Tilde: case NodeKind::Tilde:
HasTilde = true; HasTilde = true;
goto after_seek; goto after_lookahead;
case NodeKind::RParen: case NodeKind::RParen:
case NodeKind::Comma: case NodeKind::Comma:
case NodeKind::RArrowAlt: case NodeKind::RArrowAlt:
case NodeKind::EndOfFile: case NodeKind::EndOfFile:
goto after_seek; goto after_lookahead;
default: default:
continue; continue;
} }
} }
after_seek: after_lookahead:
if (HasTilde) { if (HasTilde) {
auto Left = parseArrowTypeExpression(); auto Left = parseArrowTypeExpression();
if (!Left) {
return nullptr;
}
auto Tilde = expectToken<class Tilde>(); auto Tilde = expectToken<class Tilde>();
if (!Tilde) {
Left->unref();
return nullptr;
}
auto Right = parseArrowTypeExpression(); auto Right = parseArrowTypeExpression();
if (!Right) {
Left->unref();
Tilde->unref();
return nullptr;
}
return new EqualityConstraintExpression { Left, Tilde, Right }; return new EqualityConstraintExpression { Left, Tilde, Right };
} }
auto Name = expectToken<IdentifierAlt>(); auto Name = expectToken<IdentifierAlt>();
if (!Name) {
return nullptr;
}
std::vector<VarTypeExpression*> TEs; std::vector<VarTypeExpression*> TEs;
for (;;) { for (;;) {
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
@ -619,7 +888,9 @@ after_seek:
TEs.push_back(new VarTypeExpression { static_cast<Identifier*>(T1) }); TEs.push_back(new VarTypeExpression { static_cast<Identifier*>(T1) });
break; break;
default: default:
throw UnexpectedTokenDiagnostic(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier }); DE.add<UnexpectedTokenDiagnostic>(File, T1, std::vector { NodeKind::RParen, NodeKind::RArrowAlt, NodeKind::Comma, NodeKind::Identifier });
Name->unref();
return nullptr;
} }
} }
after_vars: after_vars:
@ -628,22 +899,60 @@ after_vars:
VarTypeExpression* Parser::parseVarTypeExpression() { VarTypeExpression* Parser::parseVarTypeExpression() {
auto Name = expectToken<Identifier>(); auto Name = expectToken<Identifier>();
// TODO reject constructor symbols (starting with a capital letter) if (!Name) {
return nullptr;
}
for (auto Ch: Name->Text) {
if (!std::islower(Ch)) {
// TODO
// DE.add<TypeVarMustContainLowercaseLettersDiagnostic>(Name);
Name->unref();
return nullptr;
}
}
return new VarTypeExpression { Name }; return new VarTypeExpression { Name };
} }
InstanceDeclaration* Parser::parseInstanceDeclaration() { InstanceDeclaration* Parser::parseInstanceDeclaration() {
auto InstanceKeyword = expectToken<class InstanceKeyword>(); auto InstanceKeyword = expectToken<class InstanceKeyword>();
if (!InstanceKeyword) {
skipToLineFoldEnd();
return nullptr;
}
auto Name = expectToken<IdentifierAlt>(); auto Name = expectToken<IdentifierAlt>();
if (!Name) {
InstanceKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<TypeExpression*> TypeExps; std::vector<TypeExpression*> TypeExps;
for (;;) { for (;;) {
auto T1 = Tokens.peek(); auto T1 = Tokens.peek();
if (T1->is<BlockStart>()) { if (T1->is<BlockStart>()) {
break; break;
} }
TypeExps.push_back(parseTypeExpression()); auto TE = parseTypeExpression();
if (!TE) {
InstanceKeyword->unref();
Name->unref();
for (auto TE: TypeExps) {
TE->unref();
}
skipToLineFoldEnd();
return nullptr;
}
TypeExps.push_back(TE);
} }
auto BlockStart = expectToken<class BlockStart>(); auto BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
InstanceKeyword->unref();
Name->unref();
for (auto TE: TypeExps) {
TE->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Elements; std::vector<Node*> Elements;
for (;;) { for (;;) {
auto T2 = Tokens.peek(); auto T2 = Tokens.peek();
@ -651,9 +960,12 @@ after_vars:
Tokens.get(); Tokens.get();
break; break;
} }
Elements.push_back(parseClassElement()); auto Element = parseClassElement();
if (Element) {
Elements.push_back(Element);
}
} }
expectToken(NodeKind::LineFoldEnd); checkLineFoldEnd();
return new InstanceDeclaration( return new InstanceDeclaration(
InstanceKeyword, InstanceKeyword,
Name, Name,
@ -671,16 +983,54 @@ after_vars:
PubKeyword = static_cast<class PubKeyword*>(T0); PubKeyword = static_cast<class PubKeyword*>(T0);
} }
auto ClassKeyword = expectToken<class ClassKeyword>(); auto ClassKeyword = expectToken<class ClassKeyword>();
if (!ClassKeyword) {
if (PubKeyword) {
PubKeyword->unref();
}
skipToLineFoldEnd();
return nullptr;
}
auto Name = expectToken<IdentifierAlt>(); auto Name = expectToken<IdentifierAlt>();
if (!Name) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
skipToLineFoldEnd();
return nullptr;
}
std::vector<VarTypeExpression*> TypeVars; std::vector<VarTypeExpression*> TypeVars;
for (;;) { for (;;) {
auto T2 = Tokens.peek(); auto T2 = Tokens.peek();
if (T2->getKind() == NodeKind::BlockStart) { if (T2->getKind() == NodeKind::BlockStart) {
break; break;
} }
TypeVars.push_back(parseVarTypeExpression()); auto TE = parseVarTypeExpression();
if (!TE) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
for (auto TV: TypeVars) {
TV->unref();
}
skipToLineFoldEnd();
return nullptr;
}
TypeVars.push_back(TE);
} }
auto BlockStart = expectToken<class BlockStart>(); auto BlockStart = expectToken<class BlockStart>();
if (!BlockStart) {
if (PubKeyword) {
PubKeyword->unref();
}
ClassKeyword->unref();
for (auto TV: TypeVars) {
TV->unref();
}
skipToLineFoldEnd();
return nullptr;
}
std::vector<Node*> Elements; std::vector<Node*> Elements;
for (;;) { for (;;) {
auto T2 = Tokens.peek(); auto T2 = Tokens.peek();
@ -688,9 +1038,12 @@ after_vars:
Tokens.get(); Tokens.get();
break; break;
} }
Elements.push_back(parseClassElement()); auto Element = parseClassElement();
if (Element) {
Elements.push_back(Element);
}
} }
expectToken(NodeKind::LineFoldEnd); Tokens.get(); // Always a LineFoldEnd
return new ClassDeclaration( return new ClassDeclaration(
PubKeyword, PubKeyword,
ClassKeyword, ClassKeyword,
@ -736,10 +1089,49 @@ after_vars:
if (T0->is<EndOfFile>()) { if (T0->is<EndOfFile>()) {
break; break;
} }
Elements.push_back(parseSourceElement()); auto Element = parseSourceElement();
if (Element) {
Elements.push_back(Element);
}
} }
return new SourceFile(File, Elements); return new SourceFile(File, Elements);
} }
void Parser::skipToLineFoldEnd() {
unsigned Level = 0;
for (;;) {
auto T0 = Tokens.get();
switch (T0->getKind()) {
case NodeKind::EndOfFile:
return;
case NodeKind::LineFoldEnd:
T0->unref();
if (Level == 0) {
return;
}
break;
case NodeKind::BlockStart:
T0->unref();
Level++;
break;
case NodeKind::BlockEnd:
T0->unref();
Level--;
break;
default:
T0->unref();
break;
}
}
}
void Parser::checkLineFoldEnd() {
auto T0 = Tokens.get();
if (T0->getKind() != NodeKind::LineFoldEnd) {
DE.add<UnexpectedTokenDiagnostic>(File, T0, std::vector { NodeKind::LineFoldEnd });
skipToLineFoldEnd();
}
}
} }

View file

@ -72,6 +72,7 @@ namespace bolt {
{ "match", NodeKind::MatchKeyword }, { "match", NodeKind::MatchKeyword },
{ "class", NodeKind::ClassKeyword }, { "class", NodeKind::ClassKeyword },
{ "instance", NodeKind::InstanceKeyword }, { "instance", NodeKind::InstanceKeyword },
{ "struct", NodeKind::StructKeyword },
}; };
Scanner::Scanner(TextFile& File, Stream<Char>& Chars): Scanner::Scanner(TextFile& File, Stream<Char>& Chars):
@ -242,6 +243,8 @@ digit_finish:
return new ClassKeyword(StartLoc); return new ClassKeyword(StartLoc);
case NodeKind::InstanceKeyword: case NodeKind::InstanceKeyword:
return new InstanceKeyword(StartLoc); return new InstanceKeyword(StartLoc);
case NodeKind::StructKeyword:
return new StructKeyword(StartLoc);
default: default:
ZEN_UNREACHABLE ZEN_UNREACHABLE
} }