Create a working scanner/parser for a subset of the language

This commit is contained in:
Sam Vervaeck 2022-08-19 19:52:57 +02:00
commit fee5085497
18 changed files with 2147 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/build/
.cache/

17
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,17 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug",
"program": "${workspaceFolder}/build/bolt",
"args": ["test.bolt"],
"cwd": "${workspaceFolder}",
"preLaunchTask": "CMake: build"
}
]
}

32
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,32 @@
{
"files.associations": {
"*.tcc": "cpp",
"fstream": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"sstream": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"cstdlib": "cpp",
"array": "cpp",
"chrono": "cpp",
"cmath": "cpp",
"deque": "cpp",
"forward_list": "cpp",
"list": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"unordered_set": "cpp",
"vector": "cpp",
"exception": "cpp",
"functional": "cpp",
"string_view": "cpp",
"memory": "cpp",
"random": "cpp",
"initializer_list": "cpp",
"numeric": "cpp",
"ostream": "cpp",
"system_error": "cpp"
}
}

16
.vscode/tasks.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cmake",
"label": "CMake: build",
"command": "build",
"targets": [
"all"
],
"group": "build",
"problemMatcher": [],
"detail": "CMake template build task"
}
]
}

41
CMakeLists.txt Normal file
View file

@ -0,0 +1,41 @@
cmake_minimum_required(VERSION 3.10)
project(Bolt CXX)
set(CMAKE_CXX_STANDARD 17)
add_subdirectory(deps/zen EXCLUDE_FROM_ALL)
add_executable(
bolt
src/CST.cc
src/Diagnostics.cc
src/Scanner.cc
src/Parser.cc
src/main.cc
)
target_compile_options(
bolt
PUBLIC
-fstandalone-debug
)
target_include_directories(
bolt
PUBLIC
include
)
target_link_libraries(
bolt
PUBLIC
zen
)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/include/bolt/CST.hpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/CST.cc"
COMMAND scripts/gennodes.py --name=CST ./bolt-cst-spec.txt -Iinclude/ --include-root=bolt --source-root=src/ --namespace=bolt
DEPENDS scripts/gennodes.py
MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/bolt-cst-spec.txt"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
)

View file

@ -0,0 +1,15 @@
#ifndef BOLT_STRING_HPP
#define BOLT_STRING_HPP
#include <string>
#include <string_view>
namespace bolt {
using ByteString = std::string;
using ByteStringView = std::string_view;
}
#endif // of #ifndef BOLT_STRING_HPP

734
include/bolt/CST.hpp Normal file
View file

@ -0,0 +1,734 @@
#ifndef BOLT_CST_HPP
#define BOLT_CST_HPP
#include <vector>
#include "bolt/Text.hpp"
#include "bolt/Integer.hpp"
#include "bolt/ByteString.hpp"
namespace bolt {
enum class NodeType {
Equals,
Colon,
Dot,
DotDot,
LParen,
RParen,
LBracket,
RBracket,
LBrace,
RBrace,
LetKeyword,
MutKeyword,
PubKeyword,
TypeKeyword,
ReturnKeyword,
ModKeyword,
StructKeyword,
Invalid,
EndOfFile,
BlockStart,
BlockEnd,
LineFoldEnd,
CustomOperator,
Identifier,
StringLiteral,
IntegerLiteral,
QualifiedName,
ReferenceTypeExpression,
BindPattern,
ReferenceExpression,
ConstantExpression,
CallExpression,
ExpressionStatement,
ReturnStatement,
TypeAssert,
Param,
LetBlockBody,
LetExprBody,
LetDeclaration,
StructDeclField,
StructDecl,
SourceFile,
};
class Node {
unsigned refcount = 0;
public:
inline void ref() {
++refcount;
}
inline void unref() {
--refcount;
if (refcount == 0) {
delete this;
}
}
const NodeType Type;
inline Node(NodeType Type):
Type(Type) {}
virtual ~Node();
};
class Token : public Node {
TextLoc StartLoc;
public:
Token(NodeType Type, TextLoc StartLoc): Node(Type), StartLoc(StartLoc) {}
virtual std::string getText() const = 0;
inline TextLoc getStartLoc() {
return StartLoc;
}
inline TextLoc getEndLoc() {
TextLoc EndLoc;
EndLoc.advance(getText());
return EndLoc;
}
inline size_t getStartLine() {
return StartLoc.Line;
}
inline size_t getStartColumn() {
return StartLoc.Column;
}
inline size_t getEndLine() {
return getEndLoc().Line;
}
inline size_t getEndColumn() {
return getEndLoc().Column;
}
~Token();
};
class Equals : public Token {
public:
Equals(TextLoc StartLoc): Token(NodeType::Equals, StartLoc) {}
std::string getText() const override;
~Equals();
};
class Colon : public Token {
public:
Colon(TextLoc StartLoc): Token(NodeType::Colon, StartLoc) {}
std::string getText() const override;
~Colon();
};
class Dot : public Token {
public:
Dot(TextLoc StartLoc): Token(NodeType::Dot, StartLoc) {}
std::string getText() const override;
~Dot();
};
class DotDot : public Token {
public:
DotDot(TextLoc StartLoc): Token(NodeType::DotDot, StartLoc) {}
std::string getText() const override;
~DotDot();
};
class LParen : public Token {
public:
LParen(TextLoc StartLoc): Token(NodeType::LParen, StartLoc) {}
std::string getText() const override;
~LParen();
};
class RParen : public Token {
public:
RParen(TextLoc StartLoc): Token(NodeType::RParen, StartLoc) {}
std::string getText() const override;
~RParen();
};
class LBracket : public Token {
public:
LBracket(TextLoc StartLoc): Token(NodeType::LBracket, StartLoc) {}
std::string getText() const override;
~LBracket();
};
class RBracket : public Token {
public:
RBracket(TextLoc StartLoc): Token(NodeType::RBracket, StartLoc) {}
std::string getText() const override;
~RBracket();
};
class LBrace : public Token {
public:
LBrace(TextLoc StartLoc): Token(NodeType::LBrace, StartLoc) {}
std::string getText() const override;
~LBrace();
};
class RBrace : public Token {
public:
RBrace(TextLoc StartLoc): Token(NodeType::RBrace, StartLoc) {}
std::string getText() const override;
~RBrace();
};
class LetKeyword : public Token {
public:
LetKeyword(TextLoc StartLoc): Token(NodeType::LetKeyword, StartLoc) {}
std::string getText() const override;
~LetKeyword();
};
class MutKeyword : public Token {
public:
MutKeyword(TextLoc StartLoc): Token(NodeType::MutKeyword, StartLoc) {}
std::string getText() const override;
~MutKeyword();
};
class PubKeyword : public Token {
public:
PubKeyword(TextLoc StartLoc): Token(NodeType::PubKeyword, StartLoc) {}
std::string getText() const override;
~PubKeyword();
};
class TypeKeyword : public Token {
public:
TypeKeyword(TextLoc StartLoc): Token(NodeType::TypeKeyword, StartLoc) {}
std::string getText() const override;
~TypeKeyword();
};
class ReturnKeyword : public Token {
public:
ReturnKeyword(TextLoc StartLoc): Token(NodeType::ReturnKeyword, StartLoc) {}
std::string getText() const override;
~ReturnKeyword();
};
class ModKeyword : public Token {
public:
ModKeyword(TextLoc StartLoc): Token(NodeType::ModKeyword, StartLoc) {}
std::string getText() const override;
~ModKeyword();
};
class StructKeyword : public Token {
public:
StructKeyword(TextLoc StartLoc): Token(NodeType::StructKeyword, StartLoc) {}
std::string getText() const override;
~StructKeyword();
};
class Invalid : public Token {
public:
Invalid(TextLoc StartLoc): Token(NodeType::Invalid, StartLoc) {}
std::string getText() const override;
~Invalid();
};
class EndOfFile : public Token {
public:
EndOfFile(TextLoc StartLoc): Token(NodeType::EndOfFile, StartLoc) {}
std::string getText() const override;
~EndOfFile();
};
class BlockStart : public Token {
public:
BlockStart(TextLoc StartLoc): Token(NodeType::BlockStart, StartLoc) {}
std::string getText() const override;
~BlockStart();
};
class BlockEnd : public Token {
public:
BlockEnd(TextLoc StartLoc): Token(NodeType::BlockEnd, StartLoc) {}
std::string getText() const override;
~BlockEnd();
};
class LineFoldEnd : public Token {
public:
LineFoldEnd(TextLoc StartLoc): Token(NodeType::LineFoldEnd, StartLoc) {}
std::string getText() const override;
~LineFoldEnd();
};
class CustomOperator : public Token {
public:
ByteString Text;
CustomOperator(ByteString Text, TextLoc StartLoc): Token(NodeType::CustomOperator, StartLoc), Text(Text) {}
std::string getText() const override;
~CustomOperator();
};
class Identifier : public Token {
public:
ByteString Text;
Identifier(ByteString Text, TextLoc StartLoc): Token(NodeType::Identifier, StartLoc), Text(Text) {}
std::string getText() const override;
~Identifier();
};
class StringLiteral : public Token {
public:
ByteString Text;
StringLiteral(ByteString Text, TextLoc StartLoc): Token(NodeType::StringLiteral, StartLoc), Text(Text) {}
std::string getText() const override;
~StringLiteral();
};
class IntegerLiteral : public Token {
public:
Integer Value;
IntegerLiteral(Integer Value, TextLoc StartLoc): Token(NodeType::IntegerLiteral, StartLoc), Value(Value) {}
std::string getText() const override;
~IntegerLiteral();
};
class QualifiedName : public Node {
public:
std::vector<Identifier*> ModulePath;
Identifier* Name;
QualifiedName(
std::vector<Identifier*> ModulePath,
Identifier* Name
): Node(NodeType::QualifiedName),
ModulePath(ModulePath),
Name(Name) {}
~QualifiedName();
};
class SourceElement : public Node {
public:
SourceElement(NodeType Type): Node(Type) {}
~SourceElement();
};
class LetBodyElement : public Node {
public:
LetBodyElement(NodeType Type): Node(Type) {}
~LetBodyElement();
};
class TypeExpression : public Node {
public:
TypeExpression(NodeType Type): Node(Type) {}
~TypeExpression();
};
class ReferenceTypeExpression : public TypeExpression {
public:
QualifiedName* Name;
ReferenceTypeExpression(
QualifiedName* Name
): TypeExpression(NodeType::ReferenceTypeExpression),
Name(Name) {}
~ReferenceTypeExpression();
};
class Pattern : public Node {
public:
Pattern(NodeType Type): Node(Type) {}
~Pattern();
};
class BindPattern : public Pattern {
public:
Identifier* Name;
BindPattern(
Identifier* Name
): Pattern(NodeType::BindPattern),
Name(Name) {}
~BindPattern();
};
class Expression : public Node {
public:
Expression(NodeType Type): Node(Type) {}
~Expression();
};
class ReferenceExpression : public Expression {
public:
Identifier* Name;
ReferenceExpression(
Identifier* Name
): Expression(NodeType::ReferenceExpression),
Name(Name) {}
~ReferenceExpression();
};
class ConstantExpression : public Expression {
public:
Token* Token;
ConstantExpression(
class Token* Token
): Expression(NodeType::ConstantExpression),
Token(Token) {}
~ConstantExpression();
};
class CallExpression : public Expression {
public:
CallExpression(Expression* Function, std::vector<Expression*> Args): Expression(NodeType::CallExpression), Function(Function), Args(Args) {}
Expression* Function;
std::vector<Expression*> Args;
~CallExpression();
};
class Statement : public LetBodyElement {
public:
Statement(NodeType Type): LetBodyElement(Type) {}
~Statement();
};
class ExpressionStatement : public Statement, public SourceElement {
public:
ExpressionStatement(Expression* Expression): Statement(NodeType::ExpressionStatement), SourceElement(NodeType::ExpressionStatement), Expression(Expression) {}
Expression* Expression;
~ExpressionStatement();
};
class ReturnStatement : public Statement {
public:
ReturnStatement(ReturnKeyword* ReturnKeyword, Expression* Expression): Statement(NodeType::ReturnStatement), ReturnKeyword(ReturnKeyword), Expression(Expression) {}
ReturnKeyword* ReturnKeyword;
Expression* Expression;
~ReturnStatement();
};
class TypeAssert : public Node {
public:
TypeAssert(Colon* Colon, TypeExpression* TypeExpression): Node(NodeType::TypeAssert), Colon(Colon), TypeExpression(TypeExpression) {}
Colon* Colon;
TypeExpression* TypeExpression;
~TypeAssert();
};
class Param : public Node {
public:
Param(Pattern* Pattern, TypeAssert* TypeAssert): Node(NodeType::Param), Pattern(Pattern), TypeAssert(TypeAssert) {}
Pattern* Pattern;
TypeAssert* TypeAssert;
~Param();
};
class LetBody : public Node {
public:
LetBody(NodeType Type): Node(Type) {}
~LetBody();
};
class LetBlockBody : public LetBody {
public:
LetBlockBody(BlockStart* BlockStart, std::vector<LetBodyElement*> Elements): LetBody(NodeType::LetBlockBody), BlockStart(BlockStart), Elements(Elements) {}
BlockStart* BlockStart;
std::vector<LetBodyElement*> Elements;
~LetBlockBody();
};
class LetExprBody : public LetBody {
public:
Equals* Equals;
Expression* Expression;
LetExprBody(
class Equals* Equals,
class Expression* Expression
): LetBody(NodeType::LetExprBody),
Equals(Equals),
Expression(Expression) {}
~LetExprBody();
};
class LetDeclaration : public SourceElement, public LetBodyElement {
public:
PubKeyword* PubKeyword;
LetKeyword* LetKeywod;
MutKeyword* MutKeyword;
Pattern* Pattern;
std::vector<Param*> Params;
TypeAssert* TypeAssert;
LetBody* Body;
LetDeclaration(
class PubKeyword* PubKeyword,
class LetKeyword* LetKeywod,
class MutKeyword* MutKeyword,
class Pattern* Pattern,
std::vector<Param*> Params,
class TypeAssert* TypeAssert,
LetBody* Body
): SourceElement(NodeType::LetDeclaration),
LetBodyElement(NodeType::LetDeclaration),
PubKeyword(PubKeyword),
LetKeywod(LetKeywod),
MutKeyword(MutKeyword),
Pattern(Pattern),
Params(Params),
TypeAssert(TypeAssert),
Body(Body) {}
~LetDeclaration();
};
class StructDeclField : public Node {
public:
StructDeclField(
Identifier* Name,
Colon* Colon,
TypeExpression* TypeExpression
): Node(NodeType::StructDeclField),
Name(Name),
Colon(Colon),
TypeExpression(TypeExpression) {}
Identifier* Name;
Colon* Colon;
TypeExpression* TypeExpression;
~StructDeclField();
};
class StructDecl : public SourceElement {
public:
StructDecl(
StructKeyword* StructKeyword,
Identifier* Name,
Dot* Dot,
std::vector<StructDeclField*> Fields
): SourceElement(NodeType::StructDecl),
StructKeyword(StructKeyword),
Name(Name),
Dot(Dot),
Fields(Fields) {}
StructKeyword* StructKeyword;
Identifier* Name;
Dot* Dot;
std::vector<StructDeclField*> Fields;
~StructDecl();
};
class SourceFile : public Node {
public:
SourceFile(std::vector<SourceElement*> Elements): Node(NodeType::SourceFile), Elements(Elements) {}
std::vector<SourceElement*> Elements;
~SourceFile();
};
}
#endif

View file

@ -0,0 +1,39 @@
#pragma once
#include <vector>
#include <stdexcept>
#include "bolt/String.hpp"
#include "bolt/CST.hpp"
namespace bolt {
class Diagnostic : std::runtime_error {
public:
Diagnostic();
};
class UnexpectedTokenDiagnostic : public Diagnostic {
public:
Token* Actual;
std::vector<NodeType> Expected;
inline UnexpectedTokenDiagnostic(Token* Actual, std::vector<NodeType> Expected):
Actual(Actual), Expected(Expected) {}
};
class UnexpectedStringDiagnostic : public Diagnostic {
public:
TextLoc Location;
String Actual;
inline UnexpectedStringDiagnostic(TextLoc Location, String Actual):
Location(Location), Actual(Actual) {}
};
}

10
include/bolt/Integer.hpp Normal file
View file

@ -0,0 +1,10 @@
#ifndef BOLT_INTEGER_HPP
#define BOLT_INTEGER_HPP
namespace bolt {
using Integer = long long;
}
#endif // of #ifndef BOLT_INTEGER_HPP

45
include/bolt/Parser.hpp Normal file
View file

@ -0,0 +1,45 @@
#pragma once
#include "bolt/CST.hpp"
namespace bolt {
class Scanner;
class Parser {
Stream<Token*>& Tokens;
Token* peekFirstTokenAfterModifiers();
public:
Parser(Stream<Token*>& S);
QualifiedName* parseQualifiedName();
TypeExpression* parseTypeExpression();
Pattern* parsePattern();
Param* parseParam();
ReferenceExpression* parseReferenceExpression();
Expression* parseExpression();
ExpressionStatement* parseExpressionStatement();
LetBodyElement* parseLetBodyElement();
LetDeclaration* parseLetDeclaration();
SourceElement* parseSourceElement();
SourceFile* parseSourceFile();
};
}

140
include/bolt/Scanner.hpp Normal file
View file

@ -0,0 +1,140 @@
#pragma once
#include <cstdint>
#include <string>
#include <deque>
#include <stack>
#include "bolt/Text.hpp"
#include "bolt/String.hpp"
namespace bolt {
class Token;
template<typename T>
class Stream {
public:
virtual T get() = 0;
virtual T peek(std::size_t Offset = 0) = 0;
virtual ~Stream() {}
};
template<typename ContainerT>
class VectorStream : public Stream<typename ContainerT::value_type> {
public:
using value_type = typename ContainerT::value_type;
ContainerT& Data;
value_type Sentry;
std::size_t Offset;
VectorStream(ContainerT& Data, value_type Sentry, std::size_t Offset = 0):
Data(Data), Sentry(Sentry), Offset(Offset) {}
value_type get() override {
return Offset < Data.size() ? Data[Offset++] : Sentry;
}
value_type peek(std::size_t Offset2) override {
auto I = Offset + Offset2;
return I < Data.size() ? Data[I] : Sentry;
}
};
template<typename T>
class BufferedStream : public Stream<T> {
std::deque<T> Buffer;
protected:
virtual T read() = 0;
public:
using value_type = T;
value_type get() override {
if (Buffer.empty()) {
return read();
} else {
auto Keep = Buffer.front();
Buffer.pop_front();
return Keep;
}
}
value_type peek(std::size_t Offset = 0) override {
while (Buffer.size() <= Offset) {
Buffer.push_back(read());
}
return Buffer[Offset];
}
};
class Scanner : public BufferedStream<Token*> {
Stream<Char>& Chars;
TextLoc CurrLoc;
inline TextLoc getCurrentLoc() const {
return CurrLoc;
}
inline Char getChar() {
auto Chr = Chars.get();
if (Chr == '\n') {
CurrLoc.Line += 1;
CurrLoc.Column = 1;
} else {
CurrLoc.Column += 1;
}
return Chr;
}
inline Char peekChar(std::size_t Offset = 0) {
return Chars.peek(Offset);
}
protected:
Token* read() override;
public:
Scanner(Stream<Char>& Chars);
};
enum class FrameType {
Block,
LineFold,
};
class Punctuator : public BufferedStream<Token*> {
Stream<Token*>& Tokens;
std::stack<FrameType> Frames;
std::stack<TextLoc> Locations;
protected:
virtual Token* read() override;
public:
Punctuator(Stream<Token*>& Tokens);
};
}

13
include/bolt/String.hpp Normal file
View file

@ -0,0 +1,13 @@
#pragma once
#include <string>
namespace bolt {
using Char = char32_t;
using String = std::basic_string<Char>;
}

37
include/bolt/Text.hpp Normal file
View file

@ -0,0 +1,37 @@
#ifndef BOLT_TEXT_HPP
#define BOLT_TEXT_HPP
#include <stddef.h>
#include <string>
namespace bolt {
class TextLoc {
public:
size_t Line = 1;
size_t Column = 1;
void advance(const std::string& Text) {
for (auto Chr: Text) {
if (Chr == '\n') {
Line++;
Column = 1;
} else {
Column++;
}
}
}
};
class TextRange {
public:
TextLoc Start;
TextLoc End;
};
}
#endif // of #ifndef BOLT_TEXT_HPP

317
src/CST.cc Normal file
View file

@ -0,0 +1,317 @@
#include "bolt/CST.hpp"
namespace bolt {
Node::~Node() { }
Token::~Token() {
}
Equals::~Equals() {
}
Colon::~Colon() {
}
Dot::~Dot() {
}
DotDot::~DotDot() {
}
LParen::~LParen() {
}
RParen::~RParen() {
}
LBracket::~LBracket() {
}
RBracket::~RBracket() {
}
LBrace::~LBrace() {
}
RBrace::~RBrace() {
}
LetKeyword::~LetKeyword() {
}
MutKeyword::~MutKeyword() {
}
PubKeyword::~PubKeyword() {
}
TypeKeyword::~TypeKeyword() {
}
ReturnKeyword::~ReturnKeyword() {
}
ModKeyword::~ModKeyword() {
}
StructKeyword::~StructKeyword() {
}
Invalid::~Invalid() {
}
EndOfFile::~EndOfFile() {
}
BlockStart::~BlockStart() {
}
BlockEnd::~BlockEnd() {
}
LineFoldEnd::~LineFoldEnd() {
}
CustomOperator::~CustomOperator() {
}
Identifier::~Identifier() {
}
StringLiteral::~StringLiteral() {
}
IntegerLiteral::~IntegerLiteral() {
}
QualifiedName::~QualifiedName() {
for (auto& Element: ModulePath){
Element->unref();
}
Name->unref();
}
SourceElement::~SourceElement() {
}
LetBodyElement::~LetBodyElement() {
}
TypeExpression::~TypeExpression() {
}
ReferenceTypeExpression::~ReferenceTypeExpression() {
Name->unref();
}
Pattern::~Pattern() {
}
BindPattern::~BindPattern() {
Name->unref();
}
Expression::~Expression() {
}
ReferenceExpression::~ReferenceExpression() {
Name->unref();
}
ConstantExpression::~ConstantExpression() {
Token->unref();
}
CallExpression::~CallExpression() {
Function->unref();
for (auto& Element: Args){
Element->unref();
}
}
Statement::~Statement() {
}
ExpressionStatement::~ExpressionStatement() {
Expression->unref();
}
ReturnStatement::~ReturnStatement() {
ReturnKeyword->unref();
Expression->unref();
}
TypeAssert::~TypeAssert() {
Colon->unref();
TypeExpression->unref();
}
Param::~Param() {
Pattern->unref();
TypeAssert->unref();
}
LetBody::~LetBody() {
}
LetBlockBody::~LetBlockBody() {
BlockStart->unref();
for (auto& Element: Elements){
Element->unref();
}
}
LetExprBody::~LetExprBody() {
Equals->unref();
Expression->unref();
}
LetDeclaration::~LetDeclaration() {
if (PubKeyword) {
PubKeyword->unref();
}
LetKeywod->unref();
if (MutKeyword) {
MutKeyword->unref();
}
Pattern->unref();
for (auto& Element: Params){
Element->unref();
}
if (TypeAssert) {
TypeAssert->unref();
}
if (Body) {
Body->unref();
}
}
StructDeclField::~StructDeclField() {
Name->unref();
Colon->unref();
TypeExpression->unref();
}
StructDecl::~StructDecl() {
StructKeyword->unref();
Name->unref();
Dot->unref();
for (auto& Element: Fields){
Element->unref();
}
}
SourceFile::~SourceFile() {
for (auto& Element: Elements){
Element->unref();
}
}
std::string Equals::getText() const {
return "=";
}
std::string Colon::getText() const {
return ":";
}
std::string Dot::getText() const {
return ".";
}
std::string LParen::getText() const {
return "(";
}
std::string RParen::getText() const {
return ")";
}
std::string LBracket::getText() const {
return "[";
}
std::string RBracket::getText() const {
return "]";
}
std::string LBrace::getText() const {
return "{";
}
std::string RBrace::getText() const {
return "}";
}
std::string LetKeyword::getText() const {
return "let";
}
std::string MutKeyword::getText() const {
return "mut";
}
std::string PubKeyword::getText() const {
return "pub";
}
std::string TypeKeyword::getText() const {
return "type";
}
std::string ReturnKeyword::getText() const {
return "return";
}
std::string ModKeyword::getText() const {
return "mod";
}
std::string StructKeyword::getText() const {
return "struct";
}
std::string Invalid::getText() const {
return "";
}
std::string EndOfFile::getText() const {
return "";
}
std::string BlockStart::getText() const {
return ".";
}
std::string BlockEnd::getText() const {
return "";
}
std::string LineFoldEnd::getText() const {
return "";
}
std::string CustomOperator::getText() const {
return Text;
}
std::string Identifier::getText() const {
return Text;
}
std::string StringLiteral::getText() const {
return "\"" + Text + "\"";
}
std::string IntegerLiteral::getText() const {
return std::to_string(Value);
}
std::string DotDot::getText() const {
return "..";
}
}

9
src/Diagnostics.cc Normal file
View file

@ -0,0 +1,9 @@
#include "bolt/Diagnostics.hpp"
namespace bolt {
Diagnostic::Diagnostic():
std::runtime_error("a compiler error occurred without being caught") {}
}

225
src/Parser.cc Normal file
View file

@ -0,0 +1,225 @@
#include "bolt/CST.hpp"
#include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp"
#include "bolt/Diagnostics.hpp"
namespace bolt {
Parser::Parser(Stream<Token*>& S):
Tokens(S) {}
Token* Parser::peekFirstTokenAfterModifiers() {
std::size_t I = 0;
for (;;) {
auto T0 = Tokens.peek(I++);
switch (T0->Type) {
case NodeType::PubKeyword:
case NodeType::MutKeyword:
continue;
default:
return T0;
}
}
}
#define BOLT_EXPECT_TOKEN(name) \
{ \
auto __Token = Tokens.get(); \
if (__Token->Type != NodeType::name) { \
throw UnexpectedTokenDiagnostic(__Token, std::vector<NodeType> { NodeType::name }); \
} \
}
Pattern* Parser::parsePattern() {
auto T0 = Tokens.peek();
switch (T0->Type) {
case NodeType::Identifier:
Tokens.get();
return new BindPattern(static_cast<Identifier*>(T0));
default:
throw UnexpectedTokenDiagnostic(T0, std::vector { NodeType::Identifier });
}
}
QualifiedName* Parser::parseQualifiedName() {
std::vector<Identifier*> ModulePath;
auto Name = Tokens.get();
if (Name->Type != NodeType::Identifier) {
throw UnexpectedTokenDiagnostic(Name, std::vector { NodeType::Identifier });
}
for (;;) {
auto T1 = Tokens.peek();
if (T1->Type == NodeType::Dot) {
break;
}
Tokens.get();
ModulePath.push_back(static_cast<Identifier*>(Name));
Name = Tokens.get();
if (Name->Type != NodeType::Identifier) {
throw UnexpectedTokenDiagnostic(Name, std::vector { NodeType::Identifier });
}
}
return new QualifiedName(ModulePath, static_cast<Identifier*>(Name));
}
TypeExpression* Parser::parseTypeExpression() {
auto T0 = Tokens.peek();
switch (T0->Type) {
case NodeType::Identifier:
return new ReferenceTypeExpression(parseQualifiedName());
default:
throw UnexpectedTokenDiagnostic(T0, std::vector { NodeType::Identifier });
}
}
Expression* Parser::parseExpression() {
auto T0 = Tokens.peek();
switch (T0->Type) {
case NodeType::Identifier:
Tokens.get();
return new ReferenceExpression(static_cast<Identifier*>(T0));
case NodeType::IntegerLiteral:
case NodeType::StringLiteral:
Tokens.get();
return new ConstantExpression(T0);
default:
throw UnexpectedTokenDiagnostic(T0, std::vector { NodeType::Identifier, NodeType::IntegerLiteral });
}
}
ExpressionStatement* Parser::parseExpressionStatement() {
auto E = parseExpression();
BOLT_EXPECT_TOKEN(LineFoldEnd);
return new ExpressionStatement(E);
}
LetDeclaration* Parser::parseLetDeclaration() {
PubKeyword* Pub;
LetKeyword* Let;
MutKeyword* Mut;
auto T0 = Tokens.get();
if (T0->Type == NodeType::PubKeyword) {
Pub = static_cast<PubKeyword*>(T0);
T0 = Tokens.get();
}
if (T0->Type != NodeType::LetKeyword) {
throw UnexpectedTokenDiagnostic(T0, std::vector { NodeType::LetKeyword });
}
Let = static_cast<LetKeyword*>(T0);
auto T1 = Tokens.peek();
if (T1->Type == NodeType::MutKeyword) {
Mut = static_cast<MutKeyword*>(T1);
Tokens.get();
}
auto Patt = parsePattern();
std::vector<Param*> Params;
Token* T2;
for (;;) {
T2 = Tokens.peek();
switch (T2->Type) {
case NodeType::LineFoldEnd:
case NodeType::BlockStart:
case NodeType::Equals:
case NodeType::Colon:
goto after_params;
default:
Params.push_back(new Param(parsePattern(), nullptr));
}
}
after_params:
TypeAssert* TA = nullptr;
if (T2->Type == NodeType::Colon) {
Tokens.get();
auto TE = parseTypeExpression();
TA = new TypeAssert(static_cast<Colon*>(T2), TE);
T2 = Tokens.peek();
}
LetBody* Body;
switch (T2->Type) {
case NodeType::BlockStart:
{
Tokens.get();
std::vector<LetBodyElement*> Elements;
for (;;) {
auto T3 = Tokens.peek();
if (T3->Type == NodeType::BlockEnd) {
break;
}
Elements.push_back(parseLetBodyElement());
}
Tokens.get();
Body = new LetBlockBody(static_cast<BlockStart*>(T2), Elements);
break;
}
case NodeType::Equals:
Tokens.get();
Body = new LetExprBody(static_cast<Equals*>(T2), parseExpression());
break;
case NodeType::LineFoldEnd:
Body = nullptr;
break;
default:
std::vector<NodeType> Expected { NodeType::BlockStart, NodeType::LineFoldEnd, NodeType::Equals };
if (TA == nullptr) {
// First tokens of TypeAssert
Expected.push_back(NodeType::Colon);
// First tokens of Pattern
Expected.push_back(NodeType::Identifier);
}
throw UnexpectedTokenDiagnostic(T2, Expected);
}
BOLT_EXPECT_TOKEN(LineFoldEnd);
return new LetDeclaration(
Pub,
Let,
Mut,
Patt,
Params,
TA,
Body
);
}
LetBodyElement* Parser::parseLetBodyElement() {
auto T0 = peekFirstTokenAfterModifiers();
switch (T0->Type) {
case NodeType::LetKeyword:
return parseLetDeclaration();
default:
return parseExpressionStatement();
}
}
SourceElement* Parser::parseSourceElement() {
auto T0 = peekFirstTokenAfterModifiers();
switch (T0->Type) {
case NodeType::LetKeyword:
return parseLetDeclaration();
default:
return parseExpressionStatement();
}
}
SourceFile* Parser::parseSourceFile() {
std::vector<SourceElement*> Elements;
for (;;) {
auto T0 = Tokens.peek();
if (T0->Type == NodeType::EndOfFile) {
break;
}
Elements.push_back(parseSourceElement());
}
return new SourceFile(Elements);
}
}

326
src/Scanner.cc Normal file
View file

@ -0,0 +1,326 @@
#include <unordered_map>
#include "zen/config.hpp"
#include "bolt/Integer.hpp"
#include "bolt/CST.hpp"
#include "bolt/Diagnostics.hpp"
#include "bolt/Scanner.hpp"
namespace bolt {
static inline bool isWhiteSpace(Char Chr) {
switch (Chr) {
case ' ':
case '\n':
case '\r':
case '\t':
return true;
default:
return false;
}
}
static bool isIdentifierPart(Char Chr) {
return (Chr >= 65 && Chr <= 90) // Uppercase letter
|| (Chr >= 96 && Chr <= 122) // Lowercase letter
|| (Chr >= 48 && Chr <= 57) // Digit
|| Chr == '_';
}
static int toDigit(Char Chr) {
ZEN_ASSERT(Chr >= 48 && Chr <= 57);
return Chr - 48;
}
std::unordered_map<ByteString, NodeType> Keywords = {
{ "pub", NodeType::PubKeyword },
{ "let", NodeType::LetKeyword },
{ "mut", NodeType::MutKeyword },
{ "return", NodeType::ReturnKeyword },
{ "type", NodeType::TypeKeyword },
{ "mod", NodeType::ModKeyword },
};
Scanner::Scanner(Stream<Char>& Chars):
Chars(Chars) {}
Token* Scanner::read() {
Char C0;
for (;;) {
C0 = peekChar();
if (!isWhiteSpace(C0)) {
break;
}
getChar();
}
auto StartLoc = getCurrentLoc();
switch (C0) {
case static_cast<Char>(EOF):
return new EndOfFile(StartLoc);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{
getChar();
Integer I = toDigit(C0);
for (;;) {
auto C1 = peekChar();
switch (C1) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
getChar();
I = I * 10 + toDigit(C1);
break;
default:
goto digit_finish;
}
}
digit_finish:
return new IntegerLiteral(I, StartLoc);
}
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case '_':
{
getChar();
ByteString Text { static_cast<char>(C0) };
for (;;) {
auto C1 = peekChar();
if (!isIdentifierPart(C1)) {
break;
}
Text.push_back(C1);
getChar();
}
auto Match = Keywords.find(Text);
if (Match != Keywords.end()) {
switch (Match->second) {
case NodeType::PubKeyword:
return new PubKeyword(StartLoc);
case NodeType::LetKeyword:
return new LetKeyword(StartLoc);
case NodeType::MutKeyword:
return new MutKeyword(StartLoc);
case NodeType::TypeKeyword:
return new TypeKeyword(StartLoc);
case NodeType::ReturnKeyword:
return new ReturnKeyword(StartLoc);
default:
ZEN_UNREACHABLE
}
}
return new Identifier(Text, StartLoc);
}
case '"':
{
getChar();
ByteString Text;
bool Escaping = false;
for (;;) {
auto Loc = getCurrentLoc();
auto C1 = getChar();
if (Escaping) {
switch (C1) {
case 'a': Text.push_back('\a'); break;
case 'b': Text.push_back('\b'); break;
case 'f': Text.push_back('\f'); break;
case 'n': Text.push_back('\n'); break;
case 'r': Text.push_back('\r'); break;
case 't': Text.push_back('\t'); break;
case 'v': Text.push_back('\v'); break;
case '0': Text.push_back('\0'); break;
case '\'': Text.push_back('\''); break;
case '"': Text.push_back('"'); break;
default:
throw UnexpectedStringDiagnostic(Loc, String { C1 });
}
Escaping = false;
} else {
switch (C1) {
case '"':
goto after_string_contents;
case '\\':
Escaping = true;
break;
default:
Text.push_back(C1);
break;
}
}
}
after_string_contents:
return new StringLiteral(Text, StartLoc);
}
case '.':
{
getChar();
auto C1 = peekChar();
if (C1 == '.') {
getChar();
auto C2 = peekChar();
if (C2 == '.') {
throw UnexpectedStringDiagnostic(getCurrentLoc(), String { C2 });
}
return new DotDot(StartLoc);
}
return new Dot(StartLoc);
}
#define BOLT_SIMPLE_TOKEN(ch, name) case ch: getChar(); return new name(StartLoc);
BOLT_SIMPLE_TOKEN(':', Colon)
BOLT_SIMPLE_TOKEN('(', LParen)
BOLT_SIMPLE_TOKEN(')', RParen)
BOLT_SIMPLE_TOKEN('[', LBracket)
BOLT_SIMPLE_TOKEN(']', RBracket)
BOLT_SIMPLE_TOKEN('{', LBrace)
BOLT_SIMPLE_TOKEN('}', RBrace)
BOLT_SIMPLE_TOKEN('=', Equals)
default:
// TODO Add a diagnostic message indicating that scanning failed.
return new Invalid(StartLoc);
}
}
Punctuator::Punctuator(Stream<Token*>& Tokens):
Tokens(Tokens) {
Frames.push(FrameType::Block);
Locations.push(TextLoc { 0, 0 });
}
Token* Punctuator::read() {
auto T0 = Tokens.peek();
if (T0->Type == NodeType::EndOfFile) {
if (Frames.size() == 1) {
return T0;
}
auto Frame = Frames.top();
Frames.pop();
switch (Frame) {
case FrameType::Block:
return new BlockEnd(T0->getStartLoc());
case FrameType::LineFold:
return new LineFoldEnd(T0->getStartLoc());
}
}
auto RefLoc = Locations.top();
switch (Frames.top()) {
case FrameType::LineFold:
{
if (T0->getStartLine() > RefLoc.Line
&& T0->getStartColumn() <= RefLoc.Column) {
Frames.pop();
Locations.pop();
return new LineFoldEnd(T0->getStartLoc());
}
if (T0->Type == NodeType::Dot) {
auto T1 = Tokens.peek(1);
if (T1->getStartLine() > T0->getEndLine()) {
Tokens.get();
Frames.push(FrameType::Block);
return new BlockStart(T0->getStartLoc());
}
}
return Tokens.get();
}
case FrameType::Block:
{
if (T0->getStartColumn() <= RefLoc.Column) {
Frames.pop();
return new BlockEnd(T0->getStartLoc());
}
Frames.push(FrameType::LineFold);
Locations.push(T0->getStartLoc());
return Tokens.get();
}
}
}
}

129
src/main.cc Normal file
View file

@ -0,0 +1,129 @@
#include <stdio.h>
#include <iostream>
#include <fstream>
#include "zen/config.hpp"
#include "bolt/CST.hpp"
#include "bolt/Diagnostics.hpp"
#include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp"
using namespace bolt;
String readFile(std::string Path) {
std::ifstream File(Path);
String Out;
File.seekg(0, std::ios::end);
Out.reserve(File.tellg());
File.seekg(0, std::ios::beg);
Out.assign((std::istreambuf_iterator<char>(File)),
std::istreambuf_iterator<char>());
return Out;
}
std::string describe(NodeType Type) {
switch (Type) {
case NodeType::Identifier:
return "an identifier";
case NodeType::CustomOperator:
return "an operator";
case NodeType::IntegerLiteral:
return "an integer literal";
case NodeType::EndOfFile:
return "end-of-file";
case NodeType::BlockStart:
return "the start of a new indented block";
case NodeType::BlockEnd:
return "the end of the current indented block";
case NodeType::LineFoldEnd:
return "the end of the current line-fold";
case NodeType::LParen:
return "'('";
case NodeType::RParen:
return "')'";
case NodeType::LBrace:
return "'['";
case NodeType::RBrace:
return "']'";
case NodeType::LBracket:
return "'{'";
case NodeType::RBracket:
return "'}'";
case NodeType::Colon:
return "':'";
case NodeType::Equals:
return "'='";
case NodeType::StringLiteral:
return "a string literal";
case NodeType::Dot:
return "'.'";
case NodeType::PubKeyword:
return "'pub'";
case NodeType::LetKeyword:
return "'let'";
case NodeType::MutKeyword:
return "'mut'";
case NodeType::ReturnKeyword:
return "'return'";
case NodeType::TypeKeyword:
return "'type'";
default:
ZEN_UNREACHABLE
}
}
int main(int argc, const char* argv[]) {
if (argc < 2) {
fprintf(stderr, "Not enough arguments provided.\n");
return 1;
}
auto Text = readFile(argv[1]);
VectorStream<String> Chars(Text, EOF);
Scanner S(Chars);
Punctuator PT(S);
Parser P(PT);
SourceFile* SF;
#ifdef NDEBUG
try {
SF = P.parseSourceFile();
} catch (UnexpectedTokenDiagnostic& E) {
std::cerr << "<unknown.bolt>:" << E.Actual->getStartLine() << ":" << E.Actual->getStartColumn() << ": expected ";
switch (E.Expected.size()) {
case 0:
std::cerr << "nothing";
break;
case 1:
std::cerr << describe(E.Expected[0]);
break;
default:
auto Iter = E.Expected.begin();
std::cerr << describe(*Iter++);
NodeType Prev;
while (Iter != E.Expected.end()) {
std::cerr << ", " << describe(Prev);
Prev = *Iter++;
}
std::cerr << " or " << describe(Prev);
break;
}
std::cerr << " but instead got '" << E.Actual->getText() << "'\n";
}
#else
SF = P.parseSourceFile();
#endif
return 0;
}