Enable parsing of block expressions

Part of fixes for issue #65
This commit is contained in:
Sam Vervaeck 2024-07-10 12:03:24 +02:00
parent 9af655c072
commit 449991d0c9
Signed by: samvv
SSH key fingerprint: SHA256:dIg0ywU1OP+ZYifrYxy8c5esO72cIKB+4/9wkZj1VaY
7 changed files with 181 additions and 25 deletions

View file

@ -1,6 +1,7 @@
#ifndef BOLT_CST_HPP
#define BOLT_CST_HPP
#include <stdint.h>
#include <cmath>
#include <cstdlib>
#include <unordered_map>
@ -28,8 +29,8 @@ class Statement;
class TextLoc {
public:
size_t Line = 1;
size_t Column = 1;
std::size_t Line = 1;
std::size_t Column = 1;
inline bool isEmpty() const noexcept {
return Line == 0 && Column == 0;
@ -68,18 +69,18 @@ class TextFile {
ByteString Path;
ByteString Text;
std::vector<size_t> LineOffsets;
std::vector<std::size_t> LineOffsets;
public:
TextFile(ByteString Path, ByteString Text);
size_t getLine(size_t Offset) const;
size_t getColumn(size_t Offset) const;
size_t getStartOffsetOfLine(size_t Line) const;
size_t getEndOffsetOfLine(size_t Line) const;
std::size_t getLine(std::size_t Offset) const;
std::size_t getColumn(std::size_t Offset) const;
std::size_t getStartOffsetOfLine(std::size_t Line) const;
std::size_t getEndOffsetOfLine(std::size_t Line) const;
size_t getLineCount() const;
std::size_t getLineCount() const;
ByteString getPath() const;
@ -96,6 +97,7 @@ enum class NodeKind {
DotDot,
Tilde,
At,
DoKeyword,
LParen,
RParen,
LBracket,
@ -156,6 +158,7 @@ enum class NodeKind {
ReferenceExpression,
MatchCase,
MatchExpression,
BlockExpression,
MemberExpression,
TupleExpression,
NestedExpression,
@ -347,19 +350,19 @@ public:
TextLoc getEndLoc() const;
inline size_t getStartLine() const override {
inline std::size_t getStartLine() const override {
return StartLoc.Line;
}
inline size_t getStartColumn() const override {
inline std::size_t getStartColumn() const override {
return StartLoc.Column;
}
inline size_t getEndLine() const override {
inline std::size_t getEndLine() const override {
return getEndLoc().Line;
}
inline size_t getEndColumn() const override {
inline std::size_t getEndColumn() const override {
return getEndLoc().Column;
}
@ -481,6 +484,20 @@ public:
};
class DoKeyword : public Token {
public:
inline DoKeyword(TextLoc StartLoc):
Token(NodeKind::DoKeyword, StartLoc) {}
std::string getText() const override;
static bool classof(const Node* N) {
return N->getKind() == NodeKind::DoKeyword;
}
};
class LParen : public Token {
public:
@ -1193,7 +1210,15 @@ public:
class AnnotationContainer {
public:
std::vector<Annotation*> Annotations;
inline AnnotationContainer():
Annotations({}) {}
inline AnnotationContainer(std::vector<Annotation*> Annotations):
Annotations(Annotations) {}
};
class ExpressionAnnotation : public Annotation {
@ -1764,6 +1789,7 @@ public:
|| N->getKind() == NodeKind::InfixExpression
|| N->getKind() == NodeKind::RecordExpression
|| N->getKind() == NodeKind::MatchExpression
|| N->getKind() == NodeKind::BlockExpression
|| N->getKind() == NodeKind::MemberExpression
|| N->getKind() == NodeKind::LiteralExpression
|| N->getKind() == NodeKind::PrefixExpression;
@ -1871,6 +1897,37 @@ public:
};
class BlockExpression : public Expression {
public:
class DoKeyword* DoKeyword;
class BlockStart* BlockStart;
std::vector<Node*> Elements;
inline BlockExpression(
class DoKeyword* DoKeyword,
class BlockStart* BlockStart,
std::vector<Node*> Elements
): Expression(NodeKind::BlockExpression),
DoKeyword(DoKeyword),
BlockStart(BlockStart),
Elements(Elements) {}
inline BlockExpression(
std::vector<Annotation*> Annotations,
class DoKeyword* DoKeyword,
class BlockStart* BlockStart,
std::vector<Node*> Elements
): Expression(NodeKind::BlockExpression, Annotations),
DoKeyword(DoKeyword),
BlockStart(BlockStart),
Elements(Elements) {}
Token* getFirstToken() const override;
Token* getLastToken() const override;
};
class MemberExpression : public Expression {
public:

View file

@ -28,6 +28,7 @@ public:
BOLT_GEN_CASE(DotDot)
BOLT_GEN_CASE(Tilde)
BOLT_GEN_CASE(At)
BOLT_GEN_CASE(DoKeyword)
BOLT_GEN_CASE(LParen)
BOLT_GEN_CASE(RParen)
BOLT_GEN_CASE(LBracket)
@ -88,6 +89,7 @@ public:
BOLT_GEN_CASE(ReferenceExpression)
BOLT_GEN_CASE(MatchCase)
BOLT_GEN_CASE(MatchExpression)
BOLT_GEN_CASE(BlockExpression)
BOLT_GEN_CASE(MemberExpression)
BOLT_GEN_CASE(TupleExpression)
BOLT_GEN_CASE(NestedExpression)
@ -192,6 +194,10 @@ protected:
static_cast<D*>(this)->visitToken(N);
}
void visitDoKeyword(DoKeyword* N) {
static_cast<D*>(this)->visitToken(N);
}
void visitLParen(LParen* N) {
static_cast<D*>(this)->visitToken(N);
}
@ -452,6 +458,10 @@ protected:
static_cast<D*>(this)->visitExpression(N);
}
void visitBlockExpression(BlockExpression* N) {
static_cast<D*>(this)->visitExpression(N);
}
void visitMemberExpression(MemberExpression* N) {
static_cast<D*>(this)->visitExpression(N);
}
@ -606,6 +616,7 @@ public:
BOLT_GEN_CHILD_CASE(DotDot)
BOLT_GEN_CHILD_CASE(Tilde)
BOLT_GEN_CHILD_CASE(At)
BOLT_GEN_CHILD_CASE(DoKeyword)
BOLT_GEN_CHILD_CASE(LParen)
BOLT_GEN_CHILD_CASE(RParen)
BOLT_GEN_CHILD_CASE(LBracket)
@ -666,6 +677,7 @@ public:
BOLT_GEN_CHILD_CASE(ReferenceExpression)
BOLT_GEN_CHILD_CASE(MatchCase)
BOLT_GEN_CHILD_CASE(MatchExpression)
BOLT_GEN_CHILD_CASE(BlockExpression)
BOLT_GEN_CHILD_CASE(MemberExpression)
BOLT_GEN_CHILD_CASE(TupleExpression)
BOLT_GEN_CHILD_CASE(NestedExpression)
@ -723,6 +735,9 @@ public:
void visitEachChild(At* N) {
}
void visitEachChild(DoKeyword* N) {
}
void visitEachChild(LParen* N) {
}
@ -1051,6 +1066,17 @@ public:
}
}
void visitEachChild(BlockExpression* N) {
for (auto A: N->Annotations) {
BOLT_VISIT(A);
}
BOLT_VISIT(N->DoKeyword);
BOLT_VISIT(N->BlockStart);
for (auto Element: N->Elements) {
BOLT_VISIT(Element);
}
}
void visitEachChild(MemberExpression* N) {
for (auto A: N->Annotations) {
BOLT_VISIT(A);

View file

@ -50,6 +50,7 @@ const D* cast(const B* base) {
template<typename D, typename T>
bool isa(const T* value) {
ZEN_ASSERT(value != nullptr);
return D::classof(value);
}

View file

@ -68,6 +68,8 @@ class Punctuator : public BufferedStream<Token*> {
Stream<Token*>& Tokens;
bool ShouldStartBlock = false;
std::stack<FrameType> Frames;
std::stack<TextLoc> Locations;

View file

@ -422,6 +422,17 @@ Token* MatchExpression::getLastToken() const {
return BlockStart;
}
Token* BlockExpression::getFirstToken() const {
return DoKeyword;
}
Token* BlockExpression::getLastToken() const {
if (!Elements.empty()) {
return Elements.back()->getLastToken();
}
return BlockStart;
}
Token* RecordExpressionField::getFirstToken() const {
return Name;
}
@ -935,6 +946,10 @@ std::string At::getText() const {
return "@";
}
std::string DoKeyword::getText() const {
return "@";
}
std::string ClassKeyword::getText() const {
return "class";
}

View file

@ -1,6 +1,7 @@
// TODO check for memory leaks everywhere a nullptr is returned
#include <sys/wait.h>
#include <tuple>
#include <vector>
@ -61,6 +62,11 @@ void OperatorTable::add(std::string Name, unsigned Flags, int Precedence) {
Mapping.emplace(Name, OperatorInfo { Precedence, Flags });
}
#define BOLT_EACH_UNREF(nodes) \
for (auto N: nodes) { \
N->unref(); \
}
Parser::Parser(TextFile& File, Stream<Token*>& S, DiagnosticEngine& DE):
File(File), Tokens(S), DE(DE) {
ExprOperators.add("**", OperatorFlags_InfixR, 10);
@ -879,6 +885,38 @@ after_tuple_elements:
}
case NodeKind::MatchKeyword:
return parseMatchExpression();
case NodeKind::DoKeyword:
{
Tokens.get();
auto T1 = expectToken(NodeKind::BlockStart);
if (!T1) {
BOLT_EACH_UNREF(Annotations);
T0->unref();
return nullptr;
}
std::vector<Node*> Elements;
for (;;) {
auto T2 = Tokens.peek();
if (T2->getKind() == NodeKind::BlockEnd) {
Tokens.get()->unref();
break;
}
auto Element = parseLetBodyElement();
if (Element == nullptr) {
BOLT_EACH_UNREF(Annotations);
T0->unref();
T1->unref();
BOLT_EACH_UNREF(Elements);
return nullptr;
}
Elements.push_back(Element);
}
return new BlockExpression {
static_cast<class DoKeyword*>(T0),
static_cast<BlockStart*>(T1),
Elements
};
}
case NodeKind::IntegerLiteral:
case NodeKind::StringLiteral:
Tokens.get();
@ -1044,6 +1082,7 @@ ReturnStatement* Parser::parseReturnStatement() {
auto Annotations = parseAnnotations();
auto ReturnKeyword = expectToken<class ReturnKeyword>();
if (!ReturnKeyword) {
BOLT_EACH_UNREF(Annotations);
return nullptr;
}
Expression* Expression;
@ -1067,6 +1106,10 @@ IfStatement* Parser::parseIfStatement() {
std::vector<IfStatementPart*> Parts;
auto Annotations = parseAnnotations();
auto IfKeyword = expectToken<class IfKeyword>();
if (!IfKeyword) {
BOLT_EACH_UNREF(Annotations);
return nullptr;
}
auto Test = parseExpression();
if (!Test) {
IfKeyword->unref();

View file

@ -64,21 +64,22 @@ static int toDigit(Char Chr) {
}
std::unordered_map<ByteString, NodeKind> Keywords = {
{ "pub", NodeKind::PubKeyword },
{ "let", NodeKind::LetKeyword },
{ "foreign", NodeKind::ForeignKeyword },
{ "mut", NodeKind::MutKeyword },
{ "return", NodeKind::ReturnKeyword },
{ "type", NodeKind::TypeKeyword },
{ "mod", NodeKind::ModKeyword },
{ "if", NodeKind::IfKeyword },
{ "else", NodeKind::ElseKeyword },
{ "elif", NodeKind::ElifKeyword },
{ "match", NodeKind::MatchKeyword },
{ "class", NodeKind::ClassKeyword },
{ "instance", NodeKind::InstanceKeyword },
{ "struct", NodeKind::StructKeyword },
{ "do", NodeKind::DoKeyword },
{ "elif", NodeKind::ElifKeyword },
{ "else", NodeKind::ElseKeyword },
{ "enum", NodeKind::EnumKeyword },
{ "foreign", NodeKind::ForeignKeyword },
{ "if", NodeKind::IfKeyword },
{ "instance", NodeKind::InstanceKeyword },
{ "let", NodeKind::LetKeyword },
{ "match", NodeKind::MatchKeyword },
{ "mod", NodeKind::ModKeyword },
{ "mut", NodeKind::MutKeyword },
{ "pub", NodeKind::PubKeyword },
{ "return", NodeKind::ReturnKeyword },
{ "struct", NodeKind::StructKeyword },
{ "type", NodeKind::TypeKeyword },
};
Scanner::Scanner(DiagnosticEngine& DE, TextFile& File, Stream<Char>& Chars):
@ -290,6 +291,8 @@ digit_finish:
return new StructKeyword(StartLoc);
case NodeKind::EnumKeyword:
return new EnumKeyword(StartLoc);
case NodeKind::DoKeyword:
return new DoKeyword(StartLoc);
default:
ZEN_UNREACHABLE
}
@ -432,6 +435,12 @@ Token* Punctuator::read() {
auto T0 = Tokens.peek();
if (ShouldStartBlock) {
ShouldStartBlock = false;
Frames.push(FrameType::Block);
return new BlockStart { T0->getStartLoc() };
}
switch (T0->getKind()) {
case NodeKind::LBrace:
Frames.push(FrameType::Fallthrough);
@ -482,6 +491,9 @@ Token* Punctuator::read() {
return new BlockStart(T0->getStartLoc());
}
}
if (isa<DoKeyword>(T0)) {
ShouldStartBlock = true;
}
return Tokens.get();
}
case FrameType::Block: