Enable parsing of block expressions

Part of fixes for issue #65
This commit is contained in:
Sam Vervaeck 2024-07-10 12:03:24 +02:00
parent 9af655c072
commit 449991d0c9
Signed by: samvv
SSH key fingerprint: SHA256:dIg0ywU1OP+ZYifrYxy8c5esO72cIKB+4/9wkZj1VaY
7 changed files with 181 additions and 25 deletions

View file

@ -1,6 +1,7 @@
#ifndef BOLT_CST_HPP #ifndef BOLT_CST_HPP
#define BOLT_CST_HPP #define BOLT_CST_HPP
#include <stdint.h>
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <unordered_map> #include <unordered_map>
@ -28,8 +29,8 @@ class Statement;
class TextLoc { class TextLoc {
public: public:
size_t Line = 1; std::size_t Line = 1;
size_t Column = 1; std::size_t Column = 1;
inline bool isEmpty() const noexcept { inline bool isEmpty() const noexcept {
return Line == 0 && Column == 0; return Line == 0 && Column == 0;
@ -68,18 +69,18 @@ class TextFile {
ByteString Path; ByteString Path;
ByteString Text; ByteString Text;
std::vector<size_t> LineOffsets; std::vector<std::size_t> LineOffsets;
public: public:
TextFile(ByteString Path, ByteString Text); TextFile(ByteString Path, ByteString Text);
size_t getLine(size_t Offset) const; std::size_t getLine(std::size_t Offset) const;
size_t getColumn(size_t Offset) const; std::size_t getColumn(std::size_t Offset) const;
size_t getStartOffsetOfLine(size_t Line) const; std::size_t getStartOffsetOfLine(std::size_t Line) const;
size_t getEndOffsetOfLine(size_t Line) const; std::size_t getEndOffsetOfLine(std::size_t Line) const;
size_t getLineCount() const; std::size_t getLineCount() const;
ByteString getPath() const; ByteString getPath() const;
@ -96,6 +97,7 @@ enum class NodeKind {
DotDot, DotDot,
Tilde, Tilde,
At, At,
DoKeyword,
LParen, LParen,
RParen, RParen,
LBracket, LBracket,
@ -156,6 +158,7 @@ enum class NodeKind {
ReferenceExpression, ReferenceExpression,
MatchCase, MatchCase,
MatchExpression, MatchExpression,
BlockExpression,
MemberExpression, MemberExpression,
TupleExpression, TupleExpression,
NestedExpression, NestedExpression,
@ -347,19 +350,19 @@ public:
TextLoc getEndLoc() const; TextLoc getEndLoc() const;
inline size_t getStartLine() const override { inline std::size_t getStartLine() const override {
return StartLoc.Line; return StartLoc.Line;
} }
inline size_t getStartColumn() const override { inline std::size_t getStartColumn() const override {
return StartLoc.Column; return StartLoc.Column;
} }
inline size_t getEndLine() const override { inline std::size_t getEndLine() const override {
return getEndLoc().Line; return getEndLoc().Line;
} }
inline size_t getEndColumn() const override { inline std::size_t getEndColumn() const override {
return getEndLoc().Column; return getEndLoc().Column;
} }
@ -481,6 +484,20 @@ public:
}; };
class DoKeyword : public Token {
public:
inline DoKeyword(TextLoc StartLoc):
Token(NodeKind::DoKeyword, StartLoc) {}
std::string getText() const override;
static bool classof(const Node* N) {
return N->getKind() == NodeKind::DoKeyword;
}
};
class LParen : public Token { class LParen : public Token {
public: public:
@ -1193,7 +1210,15 @@ public:
class AnnotationContainer { class AnnotationContainer {
public: public:
std::vector<Annotation*> Annotations; std::vector<Annotation*> Annotations;
inline AnnotationContainer():
Annotations({}) {}
inline AnnotationContainer(std::vector<Annotation*> Annotations):
Annotations(Annotations) {}
}; };
class ExpressionAnnotation : public Annotation { class ExpressionAnnotation : public Annotation {
@ -1764,6 +1789,7 @@ public:
|| N->getKind() == NodeKind::InfixExpression || N->getKind() == NodeKind::InfixExpression
|| N->getKind() == NodeKind::RecordExpression || N->getKind() == NodeKind::RecordExpression
|| N->getKind() == NodeKind::MatchExpression || N->getKind() == NodeKind::MatchExpression
|| N->getKind() == NodeKind::BlockExpression
|| N->getKind() == NodeKind::MemberExpression || N->getKind() == NodeKind::MemberExpression
|| N->getKind() == NodeKind::LiteralExpression || N->getKind() == NodeKind::LiteralExpression
|| N->getKind() == NodeKind::PrefixExpression; || N->getKind() == NodeKind::PrefixExpression;
@ -1871,6 +1897,37 @@ public:
}; };
class BlockExpression : public Expression {
public:
class DoKeyword* DoKeyword;
class BlockStart* BlockStart;
std::vector<Node*> Elements;
inline BlockExpression(
class DoKeyword* DoKeyword,
class BlockStart* BlockStart,
std::vector<Node*> Elements
): Expression(NodeKind::BlockExpression),
DoKeyword(DoKeyword),
BlockStart(BlockStart),
Elements(Elements) {}
inline BlockExpression(
std::vector<Annotation*> Annotations,
class DoKeyword* DoKeyword,
class BlockStart* BlockStart,
std::vector<Node*> Elements
): Expression(NodeKind::BlockExpression, Annotations),
DoKeyword(DoKeyword),
BlockStart(BlockStart),
Elements(Elements) {}
Token* getFirstToken() const override;
Token* getLastToken() const override;
};
class MemberExpression : public Expression { class MemberExpression : public Expression {
public: public:

View file

@ -28,6 +28,7 @@ public:
BOLT_GEN_CASE(DotDot) BOLT_GEN_CASE(DotDot)
BOLT_GEN_CASE(Tilde) BOLT_GEN_CASE(Tilde)
BOLT_GEN_CASE(At) BOLT_GEN_CASE(At)
BOLT_GEN_CASE(DoKeyword)
BOLT_GEN_CASE(LParen) BOLT_GEN_CASE(LParen)
BOLT_GEN_CASE(RParen) BOLT_GEN_CASE(RParen)
BOLT_GEN_CASE(LBracket) BOLT_GEN_CASE(LBracket)
@ -88,6 +89,7 @@ public:
BOLT_GEN_CASE(ReferenceExpression) BOLT_GEN_CASE(ReferenceExpression)
BOLT_GEN_CASE(MatchCase) BOLT_GEN_CASE(MatchCase)
BOLT_GEN_CASE(MatchExpression) BOLT_GEN_CASE(MatchExpression)
BOLT_GEN_CASE(BlockExpression)
BOLT_GEN_CASE(MemberExpression) BOLT_GEN_CASE(MemberExpression)
BOLT_GEN_CASE(TupleExpression) BOLT_GEN_CASE(TupleExpression)
BOLT_GEN_CASE(NestedExpression) BOLT_GEN_CASE(NestedExpression)
@ -192,6 +194,10 @@ protected:
static_cast<D*>(this)->visitToken(N); static_cast<D*>(this)->visitToken(N);
} }
void visitDoKeyword(DoKeyword* N) {
static_cast<D*>(this)->visitToken(N);
}
void visitLParen(LParen* N) { void visitLParen(LParen* N) {
static_cast<D*>(this)->visitToken(N); static_cast<D*>(this)->visitToken(N);
} }
@ -452,6 +458,10 @@ protected:
static_cast<D*>(this)->visitExpression(N); static_cast<D*>(this)->visitExpression(N);
} }
void visitBlockExpression(BlockExpression* N) {
static_cast<D*>(this)->visitExpression(N);
}
void visitMemberExpression(MemberExpression* N) { void visitMemberExpression(MemberExpression* N) {
static_cast<D*>(this)->visitExpression(N); static_cast<D*>(this)->visitExpression(N);
} }
@ -606,6 +616,7 @@ public:
BOLT_GEN_CHILD_CASE(DotDot) BOLT_GEN_CHILD_CASE(DotDot)
BOLT_GEN_CHILD_CASE(Tilde) BOLT_GEN_CHILD_CASE(Tilde)
BOLT_GEN_CHILD_CASE(At) BOLT_GEN_CHILD_CASE(At)
BOLT_GEN_CHILD_CASE(DoKeyword)
BOLT_GEN_CHILD_CASE(LParen) BOLT_GEN_CHILD_CASE(LParen)
BOLT_GEN_CHILD_CASE(RParen) BOLT_GEN_CHILD_CASE(RParen)
BOLT_GEN_CHILD_CASE(LBracket) BOLT_GEN_CHILD_CASE(LBracket)
@ -666,6 +677,7 @@ public:
BOLT_GEN_CHILD_CASE(ReferenceExpression) BOLT_GEN_CHILD_CASE(ReferenceExpression)
BOLT_GEN_CHILD_CASE(MatchCase) BOLT_GEN_CHILD_CASE(MatchCase)
BOLT_GEN_CHILD_CASE(MatchExpression) BOLT_GEN_CHILD_CASE(MatchExpression)
BOLT_GEN_CHILD_CASE(BlockExpression)
BOLT_GEN_CHILD_CASE(MemberExpression) BOLT_GEN_CHILD_CASE(MemberExpression)
BOLT_GEN_CHILD_CASE(TupleExpression) BOLT_GEN_CHILD_CASE(TupleExpression)
BOLT_GEN_CHILD_CASE(NestedExpression) BOLT_GEN_CHILD_CASE(NestedExpression)
@ -723,6 +735,9 @@ public:
void visitEachChild(At* N) { void visitEachChild(At* N) {
} }
void visitEachChild(DoKeyword* N) {
}
void visitEachChild(LParen* N) { void visitEachChild(LParen* N) {
} }
@ -1051,6 +1066,17 @@ public:
} }
} }
void visitEachChild(BlockExpression* N) {
for (auto A: N->Annotations) {
BOLT_VISIT(A);
}
BOLT_VISIT(N->DoKeyword);
BOLT_VISIT(N->BlockStart);
for (auto Element: N->Elements) {
BOLT_VISIT(Element);
}
}
void visitEachChild(MemberExpression* N) { void visitEachChild(MemberExpression* N) {
for (auto A: N->Annotations) { for (auto A: N->Annotations) {
BOLT_VISIT(A); BOLT_VISIT(A);

View file

@ -50,6 +50,7 @@ const D* cast(const B* base) {
template<typename D, typename T> template<typename D, typename T>
bool isa(const T* value) { bool isa(const T* value) {
ZEN_ASSERT(value != nullptr);
return D::classof(value); return D::classof(value);
} }

View file

@ -68,6 +68,8 @@ class Punctuator : public BufferedStream<Token*> {
Stream<Token*>& Tokens; Stream<Token*>& Tokens;
bool ShouldStartBlock = false;
std::stack<FrameType> Frames; std::stack<FrameType> Frames;
std::stack<TextLoc> Locations; std::stack<TextLoc> Locations;

View file

@ -422,6 +422,17 @@ Token* MatchExpression::getLastToken() const {
return BlockStart; return BlockStart;
} }
Token* BlockExpression::getFirstToken() const {
return DoKeyword;
}
Token* BlockExpression::getLastToken() const {
if (!Elements.empty()) {
return Elements.back()->getLastToken();
}
return BlockStart;
}
Token* RecordExpressionField::getFirstToken() const { Token* RecordExpressionField::getFirstToken() const {
return Name; return Name;
} }
@ -935,6 +946,10 @@ std::string At::getText() const {
return "@"; return "@";
} }
std::string DoKeyword::getText() const {
return "@";
}
std::string ClassKeyword::getText() const { std::string ClassKeyword::getText() const {
return "class"; return "class";
} }

View file

@ -1,6 +1,7 @@
// TODO check for memory leaks everywhere a nullptr is returned // TODO check for memory leaks everywhere a nullptr is returned
#include <sys/wait.h>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
@ -61,6 +62,11 @@ void OperatorTable::add(std::string Name, unsigned Flags, int Precedence) {
Mapping.emplace(Name, OperatorInfo { Precedence, Flags }); Mapping.emplace(Name, OperatorInfo { Precedence, Flags });
} }
#define BOLT_EACH_UNREF(nodes) \
for (auto N: nodes) { \
N->unref(); \
}
Parser::Parser(TextFile& File, Stream<Token*>& S, DiagnosticEngine& DE): Parser::Parser(TextFile& File, Stream<Token*>& S, DiagnosticEngine& DE):
File(File), Tokens(S), DE(DE) { File(File), Tokens(S), DE(DE) {
ExprOperators.add("**", OperatorFlags_InfixR, 10); ExprOperators.add("**", OperatorFlags_InfixR, 10);
@ -879,6 +885,38 @@ after_tuple_elements:
} }
case NodeKind::MatchKeyword: case NodeKind::MatchKeyword:
return parseMatchExpression(); return parseMatchExpression();
case NodeKind::DoKeyword:
{
Tokens.get();
auto T1 = expectToken(NodeKind::BlockStart);
if (!T1) {
BOLT_EACH_UNREF(Annotations);
T0->unref();
return nullptr;
}
std::vector<Node*> Elements;
for (;;) {
auto T2 = Tokens.peek();
if (T2->getKind() == NodeKind::BlockEnd) {
Tokens.get()->unref();
break;
}
auto Element = parseLetBodyElement();
if (Element == nullptr) {
BOLT_EACH_UNREF(Annotations);
T0->unref();
T1->unref();
BOLT_EACH_UNREF(Elements);
return nullptr;
}
Elements.push_back(Element);
}
return new BlockExpression {
static_cast<class DoKeyword*>(T0),
static_cast<BlockStart*>(T1),
Elements
};
}
case NodeKind::IntegerLiteral: case NodeKind::IntegerLiteral:
case NodeKind::StringLiteral: case NodeKind::StringLiteral:
Tokens.get(); Tokens.get();
@ -1044,6 +1082,7 @@ ReturnStatement* Parser::parseReturnStatement() {
auto Annotations = parseAnnotations(); auto Annotations = parseAnnotations();
auto ReturnKeyword = expectToken<class ReturnKeyword>(); auto ReturnKeyword = expectToken<class ReturnKeyword>();
if (!ReturnKeyword) { if (!ReturnKeyword) {
BOLT_EACH_UNREF(Annotations);
return nullptr; return nullptr;
} }
Expression* Expression; Expression* Expression;
@ -1067,6 +1106,10 @@ IfStatement* Parser::parseIfStatement() {
std::vector<IfStatementPart*> Parts; std::vector<IfStatementPart*> Parts;
auto Annotations = parseAnnotations(); auto Annotations = parseAnnotations();
auto IfKeyword = expectToken<class IfKeyword>(); auto IfKeyword = expectToken<class IfKeyword>();
if (!IfKeyword) {
BOLT_EACH_UNREF(Annotations);
return nullptr;
}
auto Test = parseExpression(); auto Test = parseExpression();
if (!Test) { if (!Test) {
IfKeyword->unref(); IfKeyword->unref();

View file

@ -64,21 +64,22 @@ static int toDigit(Char Chr) {
} }
std::unordered_map<ByteString, NodeKind> Keywords = { std::unordered_map<ByteString, NodeKind> Keywords = {
{ "pub", NodeKind::PubKeyword },
{ "let", NodeKind::LetKeyword },
{ "foreign", NodeKind::ForeignKeyword },
{ "mut", NodeKind::MutKeyword },
{ "return", NodeKind::ReturnKeyword },
{ "type", NodeKind::TypeKeyword },
{ "mod", NodeKind::ModKeyword },
{ "if", NodeKind::IfKeyword },
{ "else", NodeKind::ElseKeyword },
{ "elif", NodeKind::ElifKeyword },
{ "match", NodeKind::MatchKeyword },
{ "class", NodeKind::ClassKeyword }, { "class", NodeKind::ClassKeyword },
{ "instance", NodeKind::InstanceKeyword }, { "do", NodeKind::DoKeyword },
{ "struct", NodeKind::StructKeyword }, { "elif", NodeKind::ElifKeyword },
{ "else", NodeKind::ElseKeyword },
{ "enum", NodeKind::EnumKeyword }, { "enum", NodeKind::EnumKeyword },
{ "foreign", NodeKind::ForeignKeyword },
{ "if", NodeKind::IfKeyword },
{ "instance", NodeKind::InstanceKeyword },
{ "let", NodeKind::LetKeyword },
{ "match", NodeKind::MatchKeyword },
{ "mod", NodeKind::ModKeyword },
{ "mut", NodeKind::MutKeyword },
{ "pub", NodeKind::PubKeyword },
{ "return", NodeKind::ReturnKeyword },
{ "struct", NodeKind::StructKeyword },
{ "type", NodeKind::TypeKeyword },
}; };
Scanner::Scanner(DiagnosticEngine& DE, TextFile& File, Stream<Char>& Chars): Scanner::Scanner(DiagnosticEngine& DE, TextFile& File, Stream<Char>& Chars):
@ -290,6 +291,8 @@ digit_finish:
return new StructKeyword(StartLoc); return new StructKeyword(StartLoc);
case NodeKind::EnumKeyword: case NodeKind::EnumKeyword:
return new EnumKeyword(StartLoc); return new EnumKeyword(StartLoc);
case NodeKind::DoKeyword:
return new DoKeyword(StartLoc);
default: default:
ZEN_UNREACHABLE ZEN_UNREACHABLE
} }
@ -432,6 +435,12 @@ Token* Punctuator::read() {
auto T0 = Tokens.peek(); auto T0 = Tokens.peek();
if (ShouldStartBlock) {
ShouldStartBlock = false;
Frames.push(FrameType::Block);
return new BlockStart { T0->getStartLoc() };
}
switch (T0->getKind()) { switch (T0->getKind()) {
case NodeKind::LBrace: case NodeKind::LBrace:
Frames.push(FrameType::Fallthrough); Frames.push(FrameType::Fallthrough);
@ -482,6 +491,9 @@ Token* Punctuator::read() {
return new BlockStart(T0->getStartLoc()); return new BlockStart(T0->getStartLoc());
} }
} }
if (isa<DoKeyword>(T0)) {
ShouldStartBlock = true;
}
return Tokens.get(); return Tokens.get();
} }
case FrameType::Block: case FrameType::Block: