From cd1e20d4605161ed85512cd7ba4c1c88f3572915 Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Sun, 21 Aug 2022 16:25:52 +0200 Subject: [PATCH] Partially implement a type checker --- CMakeLists.txt | 1 + include/bolt/CST.hpp | 100 +++++++++---- include/bolt/Checker.hpp | 279 +++++++++++++++++++++++++++++++++++ include/bolt/Parser.hpp | 65 ++++++++- src/CST.cc | 24 ++- src/Checker.cc | 307 +++++++++++++++++++++++++++++++++++++++ src/Parser.cc | 114 ++++++++++++++- src/Scanner.cc | 74 ++++++++-- src/main.cc | 4 + 9 files changed, 912 insertions(+), 56 deletions(-) create mode 100644 include/bolt/Checker.hpp create mode 100644 src/Checker.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index ab195d38f..9ce4fa034 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable( src/Diagnostics.cc src/Scanner.cc src/Parser.cc + src/Checker.cc src/main.cc ) target_compile_options( diff --git a/include/bolt/CST.hpp b/include/bolt/CST.hpp index 655bdafd4..ee4d71a6c 100644 --- a/include/bolt/CST.hpp +++ b/include/bolt/CST.hpp @@ -33,6 +33,7 @@ namespace bolt { BlockEnd, LineFoldEnd, CustomOperator, + Assignment, Identifier, StringLiteral, IntegerLiteral, @@ -42,6 +43,8 @@ namespace bolt { ReferenceExpression, ConstantExpression, CallExpression, + InfixExpression, + UnaryExpression, ExpressionStatement, ReturnStatement, TypeAssert, @@ -377,6 +380,19 @@ namespace bolt { }; + class Assignment : public Token { + public: + + ByteString Text; + + Assignment(ByteString Text, TextLoc StartLoc): Token(NodeType::Assignment, StartLoc), Text(Text) {} + + std::string getText() const override; + + ~Assignment(); + + }; + class Identifier : public Token { public: @@ -433,24 +449,6 @@ namespace bolt { }; - class SourceElement : public Node { - public: - - SourceElement(NodeType Type): Node(Type) {} - - ~SourceElement(); - - }; - - class LetBodyElement : public Node { - public: - - LetBodyElement(NodeType Type): Node(Type) {} - - ~LetBodyElement(); - - }; - class TypeExpression : public Node { public: @@ -546,19 +544,54 @@ namespace bolt { }; - class Statement : public LetBodyElement { + class InfixExpression : public Expression { public: - Statement(NodeType Type): LetBodyElement(Type) {} + Expression* LHS; + Token* Operator; + Expression* RHS; + + InfixExpression(Expression* LHS, Token* Operator, Expression* RHS): + Expression(NodeType::InfixExpression), + LHS(LHS), + Operator(Operator), + RHS(RHS) {} + + ~InfixExpression(); + + }; + + class UnaryExpression : public Expression { + public: + + Token* Operator; + Expression* Argument; + + UnaryExpression( + Token* Operator, + Expression* Argument + ): Expression(NodeType::UnaryExpression), + Operator(Operator), + Argument(Argument) {} + + ~UnaryExpression(); + + }; + + class Statement : public Node { + public: + + Statement(NodeType Type): Node(Type) {} ~Statement(); }; - class ExpressionStatement : public Statement, public SourceElement { + class ExpressionStatement : public Statement { public: - ExpressionStatement(Expression* Expression): Statement(NodeType::ExpressionStatement), SourceElement(NodeType::ExpressionStatement), Expression(Expression) {} + ExpressionStatement(Expression* Expression): + Statement(NodeType::ExpressionStatement), Expression(Expression) {} Expression* Expression; @@ -615,10 +648,15 @@ namespace bolt { class LetBlockBody : public LetBody { public: - LetBlockBody(BlockStart* BlockStart, std::vector Elements): LetBody(NodeType::LetBlockBody), BlockStart(BlockStart), Elements(Elements) {} + LetBlockBody( + BlockStart* BlockStart, + std::vector Elements + ): LetBody(NodeType::LetBlockBody), + BlockStart(BlockStart), + Elements(Elements) {} BlockStart* BlockStart; - std::vector Elements; + std::vector Elements; ~LetBlockBody(); @@ -641,7 +679,7 @@ namespace bolt { }; - class LetDeclaration : public SourceElement, public LetBodyElement { + class LetDeclaration : public Node { public: PubKeyword* PubKeyword; @@ -660,8 +698,7 @@ namespace bolt { std::vector Params, class TypeAssert* TypeAssert, LetBody* Body - ): SourceElement(NodeType::LetDeclaration), - LetBodyElement(NodeType::LetDeclaration), + ): Node(NodeType::LetDeclaration), PubKeyword(PubKeyword), LetKeywod(LetKeywod), MutKeyword(MutKeyword), @@ -694,7 +731,7 @@ namespace bolt { }; - class StructDecl : public SourceElement { + class StructDecl : public Node { public: StructDecl( @@ -702,7 +739,7 @@ namespace bolt { Identifier* Name, Dot* Dot, std::vector Fields - ): SourceElement(NodeType::StructDecl), + ): Node(NodeType::StructDecl), StructKeyword(StructKeyword), Name(Name), Dot(Dot), @@ -721,9 +758,10 @@ namespace bolt { public: - SourceFile(std::vector Elements): Node(NodeType::SourceFile), Elements(Elements) {} + SourceFile(std::vector Elements): + Node(NodeType::SourceFile), Elements(Elements) {} - std::vector Elements; + std::vector Elements; ~SourceFile(); diff --git a/include/bolt/Checker.hpp b/include/bolt/Checker.hpp new file mode 100644 index 000000000..db1161092 --- /dev/null +++ b/include/bolt/Checker.hpp @@ -0,0 +1,279 @@ + +#pragma once + +#include "zen/config.hpp" + +#include "bolt/ByteString.hpp" + +#include +#include +#include +#include + +namespace bolt { + + class Node; + class Expression; + class SourceFile; + + class Type; + class TVar; + + using TVSub = std::unordered_map; + using TVSet = std::unordered_set; + + enum class TypeKind : unsigned char { + Var, + Con, + Arrow, + Any, + }; + + class Type { + + const TypeKind Kind; + + protected: + + inline Type(TypeKind Kind): + Kind(Kind) {} + + public: + + bool hasTypeVar(const TVar* TV); + + Type* substitute(const TVSub& Sub); + + inline TypeKind getKind() const noexcept { + return Kind; + } + + }; + + class TCon : public Type { + public: + + const size_t Id; + std::vector Args; + + inline TCon(const size_t Id, std::vector Args ): + Type(TypeKind::Con), Id(Id), Args(Args) {} + + }; + + class TVar : public Type { + public: + + const size_t Id; + + inline TVar(size_t Id): + Type(TypeKind::Var), Id(Id) {} + + }; + + class TArrow : public Type { + public: + + std::vector ParamTypes; + Type* ReturnType; + + inline TArrow( + std::vector ParamTypes, + Type* ReturnType + ): Type(TypeKind::Arrow), + ParamTypes(ParamTypes), + ReturnType(ReturnType) {} + + }; + + class TAny : public Type { + public: + + inline TAny(): + Type(TypeKind::Any) {} + + }; + + // template + // struct DerefHash { + // std::size_t operator()(const T& Value) const noexcept { + // return std::hash{}(*Value); + // } + // }; + + class Constraint; + + class Forall { + public: + + TVSet TVs; + std::vector Constriants; + Type* Type; + + }; + + enum class SchemeKind : unsigned char { + Forall, + }; + + class Scheme { + + const SchemeKind Kind; + + union { + Forall F; + }; + + public: + + inline Scheme(Forall F): + Kind(SchemeKind::Forall), F(F) {} + + inline Scheme(const Scheme& Other): + Kind(Other.Kind) { + switch (Kind) { + case SchemeKind::Forall: + F = Other.F; + break; + } + } + + + inline Scheme(Scheme&& Other): + Kind(std::move(Other.Kind)) { + switch (Kind) { + case SchemeKind::Forall: + F = std::move(Other.F); + break; + } + } + + template + T& as(); + + template<> + Forall& as() { + ZEN_ASSERT(Kind == SchemeKind::Forall); + return F; + } + + inline SchemeKind getKind() const noexcept { + return Kind; + } + + ~Scheme() { + switch (Kind) { + case SchemeKind::Forall: + F.~Forall(); + break; + } + } + + }; + + class TypeEnv { + + std::unordered_map Mapping; + + public: + + void add(ByteString Name, Scheme S); + + Scheme* lookup(ByteString Name); + + Type* lookupMono(ByteString Name); + + }; + + enum class ConstraintKind { + Equal, + Many, + Empty, + }; + + class Constraint { + + const ConstraintKind Kind; + + public: + + inline Constraint(ConstraintKind Kind): + Kind(Kind) {} + + inline ConstraintKind getKind() const noexcept { + return Kind; + } + + virtual ~Constraint() {} + + }; + + using ConstraintSet = std::vector; + + class CEqual : public Constraint { + public: + + Type* Left; + Type* Right; + + inline CEqual(Type* Left, Type* Right): + Constraint(ConstraintKind::Equal), Left(Left), Right(Right) {} + + }; + + class CMany : public Constraint { + public: + + ConstraintSet Constraints; + + inline CMany(ConstraintSet Constraints): + Constraint(ConstraintKind::Many), Constraints(Constraints) {} + + }; + + class CEmpty : public Constraint { + public: + + inline CEmpty(): + Constraint(ConstraintKind::Empty) {} + + }; + + class InferContext { + + ConstraintSet& Constraints; + + public: + + TypeEnv& Env; + + inline InferContext(ConstraintSet& Constraints, TypeEnv& Env): + Constraints(Constraints), Env(Env) {} + + void addConstraint(Constraint* C); + + }; + + class Checker { + + size_t nextTypeVarId = 0; + + Type* inferExpression(Expression* Expression, InferContext& Env); + + void infer(Node* node, InferContext& Env); + + TVar* createTypeVar(); + + Type* instantiate(Scheme& S); + + bool unify(Type* A, Type* B, TVSub& Solution); + + void solve(Constraint* Constraint); + + public: + + void check(SourceFile* SF); + + }; + +} diff --git a/include/bolt/Parser.hpp b/include/bolt/Parser.hpp index 49a14b49b..7ac68eb51 100644 --- a/include/bolt/Parser.hpp +++ b/include/bolt/Parser.hpp @@ -1,18 +1,73 @@ #pragma once +#include +#include + #include "bolt/CST.hpp" namespace bolt { class Scanner; + enum OperatorFlags { + OperatorFlags_Prefix = 1, + OperatorFlags_Suffix = 2, + OperatorFlags_InfixL = 4, + OperatorFlags_InfixR = 8, + }; + + struct OperatorInfo { + + int Precedence; + unsigned Flags; + + inline bool isPrefix() const noexcept { + return Flags & OperatorFlags_Prefix; + } + + inline bool isSuffix() const noexcept { + return Flags & OperatorFlags_Suffix; + } + + inline bool isInfix() const noexcept { + return Flags & (OperatorFlags_InfixL | OperatorFlags_InfixR); + } + + inline bool isRightAssoc() const noexcept { + return Flags & OperatorFlags_InfixR; + } + + }; + + class OperatorTable { + + std::unordered_map Mapping; + + public: + + void add(std::string Name, unsigned Flags, int Precedence); + + std::optional getInfix(Token* T); + + bool isInfix(Token* T); + + bool isPrefix(Token* T); + + bool isSuffix(Token* T); + + }; + class Parser { Stream& Tokens; + OperatorTable ExprOperators; + Token* peekFirstTokenAfterModifiers(); + Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence); + public: Parser(Stream& S); @@ -27,15 +82,21 @@ namespace bolt { ReferenceExpression* parseReferenceExpression(); + Expression* parseUnaryExpression(); + + Expression* parsePrimitiveExpression(); + Expression* parseExpression(); + Expression* parseCallExpression(); + ExpressionStatement* parseExpressionStatement(); - LetBodyElement* parseLetBodyElement(); + Node* parseLetBodyElement(); LetDeclaration* parseLetDeclaration(); - SourceElement* parseSourceElement(); + Node* parseSourceElement(); SourceFile* parseSourceFile(); diff --git a/src/CST.cc b/src/CST.cc index b2e05ce72..c780feb51 100644 --- a/src/CST.cc +++ b/src/CST.cc @@ -77,6 +77,9 @@ namespace bolt { CustomOperator::~CustomOperator() { } + Assignment::~Assignment() { + } + Identifier::~Identifier() { } @@ -93,12 +96,6 @@ namespace bolt { Name->unref(); } - SourceElement::~SourceElement() { - } - - LetBodyElement::~LetBodyElement() { - } - TypeExpression::~TypeExpression() { } @@ -131,6 +128,17 @@ namespace bolt { } } + InfixExpression::~InfixExpression() { + LHS->unref(); + Operator->unref(); + RHS->unref(); + } + + UnaryExpression::~UnaryExpression() { + Operator->unref(); + Argument->unref(); + } + Statement::~Statement() { } @@ -296,6 +304,10 @@ namespace bolt { std::string CustomOperator::getText() const { return Text; } + + std::string Assignment::getText() const { + return Text + "="; + } std::string Identifier::getText() const { return Text; diff --git a/src/Checker.cc b/src/Checker.cc new file mode 100644 index 000000000..8a254f750 --- /dev/null +++ b/src/Checker.cc @@ -0,0 +1,307 @@ + +#include + +#include "zen/config.hpp" + +#include "bolt/CST.hpp" +#include "bolt/Checker.hpp" + +namespace bolt { + + Scheme* TypeEnv::lookup(ByteString Name) { + auto Match = Mapping.find(Name); + if (Match == Mapping.end()) { + return {}; + } + return &Match->second; + } + + Type* TypeEnv::lookupMono(ByteString Name) { + auto Match = Mapping.find(Name); + if (Match == Mapping.end()) { + return nullptr; + } + auto& F = Match->second.as(); + ZEN_ASSERT(F.TVs.empty()); + return F.Type; + } + + bool Type::hasTypeVar(const TVar* TV) { + switch (Kind) { + case TypeKind::Var: + return static_cast(this)->Id == TV->Id; + case TypeKind::Arrow: + { + auto Y = static_cast(this); + for (auto Ty: Y->ParamTypes) { + if (Ty->hasTypeVar(TV)) { + return true; + } + } + return Y->ReturnType->hasTypeVar(TV); + } + } + } + + Type* Type::substitute(const TVSub &Sub) { + switch (Kind) { + case TypeKind::Var: + { + auto Y = static_cast(this); + auto Match = Sub.find(Y); + return Match != Sub.end() ? Match->second : Y; + } + case TypeKind::Arrow: + { + auto Y = static_cast(this); + std::vector NewParamTypes; + for (auto Ty: Y->ParamTypes) { + NewParamTypes.push_back(Ty->substitute(Sub)); + } + auto NewRetTy = Y->ReturnType->substitute(Sub) ; + return new TArrow(NewParamTypes, NewRetTy); + } + case TypeKind::Any: + return this; + case TypeKind::Con: + { + auto Y = static_cast(this); + std::vector NewArgs; + for (auto Arg: Y->Args) { + NewArgs.push_back(Arg->substitute(Sub)); + } + return new TCon(Y->Id, Y->Args); + } + } + } + + void InferContext::addConstraint(Constraint *C) { + Constraints.push_back(C); + } + + void Checker::infer(Node* X, InferContext& Ctx) { + + switch (X->Type) { + + case NodeType::SourceFile: + { + auto Y = static_cast(X); + for (auto Element: Y->Elements) { + infer(Element, Ctx); + } + break; + } + + case NodeType::LetDeclaration: + { + // TODO + break; + } + + + case NodeType::ExpressionStatement: + { + auto Y = static_cast(X); + inferExpression(Y->Expression, Ctx); + break; + } + + default: + ZEN_UNREACHABLE + + } + + } + + TVar* Checker::createTypeVar() { + return new TVar(nextTypeVarId++); + } + + Type* Checker::instantiate(Scheme& S) { + + switch (S.getKind()) { + + case SchemeKind::Forall: + { + auto& F = S.as(); + TVSub Sub; + for (auto TV: F.TVs) { + Sub[TV] = createTypeVar(); + } + return F.Type->substitute(Sub); + } + + } + + } + + + Type* Checker::inferExpression(Expression* X, InferContext& Ctx) { + + switch (X->Type) { + + case NodeType::ConstantExpression: + { + auto Y = static_cast(X); + switch (Y->Token->Type) { + case NodeType::IntegerLiteral: + return Ctx.Env.lookupMono("Int"); + case NodeType::StringLiteral: + return Ctx.Env.lookupMono("String"); + default: + ZEN_UNREACHABLE + } + } + + case NodeType::ReferenceExpression: + { + auto Y = static_cast(X); + auto Scm = Ctx.Env.lookup(Y->Name->Text); + if (Scm == nullptr) { + // TODO add diagnostic + return new TAny(); + } + return instantiate(*Scm); + } + + case NodeType::InfixExpression: + { + auto Y = static_cast(X); + auto Scm = Ctx.Env.lookup(Y->Operator->getText()); + if (Scm == nullptr) { + // TODO add diagnostic + return new TAny(); + } + auto OpTy = instantiate(*Scm); + auto RetTy = createTypeVar(); + std::vector ArgTys; + ArgTys.push_back(inferExpression(Y->LHS, Ctx)); + ArgTys.push_back(inferExpression(Y->RHS, Ctx)); + Ctx.addConstraint(new CEqual { new TArrow(ArgTys, RetTy), OpTy }); + return RetTy; + } + + default: + ZEN_UNREACHABLE + + } + + } + + void Checker::check(SourceFile *SF) { + TypeEnv Global; + ConstraintSet Constraints; + InferContext Toplevel { Constraints, Global }; + infer(SF, Toplevel); + solve(new CMany(Constraints)); + } + + void Checker::solve(Constraint* Constraint) { + + std::stack Queue; + TVSub Sub; + + while (!Queue.empty()) { + + auto Constraint = Queue.top(); + + Queue.pop(); + + switch (Constraint->getKind()) { + + case ConstraintKind::Empty: + break; + + case ConstraintKind::Many: + { + auto Y = static_cast(Constraint); + for (auto Constraint: Y->Constraints) { + Queue.push(Constraint); + } + break; + } + + case ConstraintKind::Equal: + { + auto Y = static_cast(Constraint); + if (!unify(Y->Left, Y->Right, Sub)) { + // TODO diagnostic + fprintf(stderr, "unification error\n"); + } + break; + } + + } + + } + + } + + bool Checker::unify(Type* A, Type* B, TVSub& Solution) { + + if (A->getKind() == TypeKind::Var) { + auto Match = Solution.find(static_cast(A)); + if (Match != Solution.end()) { + A = Match->second; + } + } + + if (B->getKind() == TypeKind::Var) { + auto Match = Solution.find(static_cast(B)); + if (Match != Solution.end()) { + B = Match->second; + } + } + + if (A->getKind() == TypeKind::Var) { + auto Y = static_cast(A); + if (B->hasTypeVar(Y)) { + // TODO occurs check + } + Solution[Y] = B; + return true; + } + + if (B->getKind() == TypeKind::Var) { + return unify(B, A, Solution); + } + + if (A->getKind() == TypeKind::Arrow && B->getKind() == TypeKind::Arrow) { + auto Y = static_cast(A); + auto Z = static_cast(B); + if (Y->ParamTypes.size() != Z->ParamTypes.size()) { + // TODO diagnostic + return false; + } + auto Count = Y->ParamTypes.size(); + for (std::size_t I = 0; I < Count; I++) { + if (!unify(Y->ParamTypes[I], Z->ParamTypes[I], Solution)) { + return false; + } + } + return unify(Y->ReturnType, Z->ReturnType, Solution); + } + + if (A->getKind() == TypeKind::Con && B->getKind() == TypeKind::Arrow) { + auto Y = static_cast(A); + auto Z = static_cast(B); + if (Y->Id != Z->Id) { + // TODO diagnostic + return false; + } + ZEN_ASSERT(Y->Args.size() == Z->Args.size()); + auto Count = Y->Args.size(); + for (std::size_t I = 0; I < Count; I++) { + if (!unify(Y->Args[I], Z->Args[I], Solution)) { + return false; + } + } + return true; + } + + // TODO diagnostic + return false; + } + +} + diff --git a/src/Parser.cc b/src/Parser.cc index b8ab78687..baf426aeb 100644 --- a/src/Parser.cc +++ b/src/Parser.cc @@ -3,11 +3,54 @@ #include "bolt/Scanner.hpp" #include "bolt/Parser.hpp" #include "bolt/Diagnostics.hpp" +#include namespace bolt { + std::optional OperatorTable::getInfix(Token* T) { + auto Match = Mapping.find(T->getText()); + if (Match == Mapping.end() || !Match->second.isInfix()) { + return {}; + } + return Match->second; + } + + bool OperatorTable::isInfix(Token* T) { + auto Match = Mapping.find(T->getText()); + return Match != Mapping.end() && Match->second.isInfix(); + } + + bool OperatorTable::isPrefix(Token* T) { + auto Match = Mapping.find(T->getText()); + return Match != Mapping.end() && Match->second.isPrefix(); + } + + bool OperatorTable::isSuffix(Token* T) { + auto Match = Mapping.find(T->getText()); + return Match != Mapping.end() && Match->second.isSuffix(); + } + + void OperatorTable::add(std::string Name, unsigned Flags, int Precedence) { + Mapping.emplace(Name, OperatorInfo { Precedence, Flags }); + } + Parser::Parser(Stream& S): - Tokens(S) {} + Tokens(S) { + ExprOperators.add("**", OperatorFlags_InfixR, 10); + ExprOperators.add("*", OperatorFlags_InfixL, 5); + ExprOperators.add("/", OperatorFlags_InfixL, 5); + ExprOperators.add("+", OperatorFlags_InfixL, 4); + ExprOperators.add("-", OperatorFlags_InfixL, 4); + ExprOperators.add("<", OperatorFlags_InfixL, 3); + ExprOperators.add(">", OperatorFlags_InfixL, 3); + ExprOperators.add("<=", OperatorFlags_InfixL, 3); + ExprOperators.add(">=", OperatorFlags_InfixL, 3); + ExprOperators.add("==", OperatorFlags_InfixL, 3); + ExprOperators.add("!=", OperatorFlags_InfixL, 3); + ExprOperators.add(":", OperatorFlags_InfixL, 2); + ExprOperators.add("<|>", OperatorFlags_InfixL, 1); + ExprOperators.add("$", OperatorFlags_InfixR, 0); + } Token* Parser::peekFirstTokenAfterModifiers() { std::size_t I = 0; @@ -73,7 +116,7 @@ namespace bolt { } } - Expression* Parser::parseExpression() { + Expression* Parser::parsePrimitiveExpression() { auto T0 = Tokens.peek(); switch (T0->Type) { case NodeType::Identifier: @@ -88,6 +131,65 @@ namespace bolt { } } + Expression* Parser::parseCallExpression() { + auto Operator = parsePrimitiveExpression(); + std::vector Args; + for (;;) { + auto T1 = Tokens.peek(); + if (T1->Type == NodeType::LineFoldEnd || ExprOperators.isInfix(T1)) { + break; + } + Args.push_back(parsePrimitiveExpression()); + } + if (Args.empty()) { + return Operator; + } + return new CallExpression(Operator, Args); + } + + Expression* Parser::parseUnaryExpression() { + std::vector Prefix; + for (;;) { + auto T0 = Tokens.peek(); + if (!ExprOperators.isPrefix(T0)) { + break; + } + Tokens.get(); + Prefix.push_back(T0); + } + auto E = parseCallExpression(); + for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) { + E = new UnaryExpression(*Iter, E); + } + return E; + } + + Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) { + for (;;) { + auto T0 = Tokens.peek(); + auto Info0 = ExprOperators.getInfix(T0); + if (!Info0 || Info0->Precedence < MinPrecedence) { + break; + } + Tokens.get(); + auto RHS = parseUnaryExpression(); + for (;;) { + auto T1 = Tokens.peek(); + auto Info1 = ExprOperators.getInfix(T1); + if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) { + break; + } + RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence); + } + LHS = new InfixExpression(LHS, T0, RHS); + } + return LHS; + } + + Expression* Parser::parseExpression() { + return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0); + } + ExpressionStatement* Parser::parseExpressionStatement() { auto E = parseExpression(); BOLT_EXPECT_TOKEN(LineFoldEnd); @@ -146,7 +248,7 @@ after_params: case NodeType::BlockStart: { Tokens.get(); - std::vector Elements; + std::vector Elements; for (;;) { auto T3 = Tokens.peek(); if (T3->Type == NodeType::BlockEnd) { @@ -189,7 +291,7 @@ after_params: ); } - LetBodyElement* Parser::parseLetBodyElement() { + Node* Parser::parseLetBodyElement() { auto T0 = peekFirstTokenAfterModifiers(); switch (T0->Type) { case NodeType::LetKeyword: @@ -199,7 +301,7 @@ after_params: } } - SourceElement* Parser::parseSourceElement() { + Node* Parser::parseSourceElement() { auto T0 = peekFirstTokenAfterModifiers(); switch (T0->Type) { case NodeType::LetKeyword: @@ -210,7 +312,7 @@ after_params: } SourceFile* Parser::parseSourceFile() { - std::vector Elements; + std::vector Elements; for (;;) { auto T0 = Tokens.peek(); if (T0->Type == NodeType::EndOfFile) { diff --git a/src/Scanner.cc b/src/Scanner.cc index cbdcb041e..47a570cd4 100644 --- a/src/Scanner.cc +++ b/src/Scanner.cc @@ -22,6 +22,28 @@ namespace bolt { } } + static inline bool isOperatorPart(Char Chr) { + switch (Chr) { + case '+': + case '-': + case '*': + case '/': + case '^': + case '&': + case '|': + case '%': + case '$': + case '!': + case '?': + case '>': + case '<': + case '=': + return true; + default: + return false; + } + } + static bool isIdentifierPart(Char Chr) { return (Chr >= 65 && Chr <= 90) // Uppercase letter || (Chr >= 96 && Chr <= 122) // Lowercase letter @@ -48,18 +70,17 @@ namespace bolt { Token* Scanner::read() { + TextLoc StartLoc; Char C0; for (;;) { - C0 = peekChar(); + StartLoc = getCurrentLoc(); + C0 = getChar(); if (!isWhiteSpace(C0)) { break; } - getChar(); } - auto StartLoc = getCurrentLoc(); - switch (C0) { case static_cast(EOF): @@ -76,7 +97,6 @@ namespace bolt { case '8': case '9': { - getChar(); Integer I = toDigit(C0); for (;;) { auto C1 = peekChar(); @@ -156,7 +176,6 @@ digit_finish: case 'Z': case '_': { - getChar(); ByteString Text { static_cast(C0) }; for (;;) { auto C1 = peekChar(); @@ -188,7 +207,6 @@ digit_finish: case '"': { - getChar(); ByteString Text; bool Escaping = false; for (;;) { @@ -229,7 +247,6 @@ after_string_contents: case '.': { - getChar(); auto C1 = peekChar(); if (C1 == '.') { getChar(); @@ -242,8 +259,42 @@ after_string_contents: return new Dot(StartLoc); } -#define BOLT_SIMPLE_TOKEN(ch, name) case ch: getChar(); return new name(StartLoc); + case '+': + case '-': + case '*': + case '/': + case '^': + case '&': + case '|': + case '%': + case '$': + case '!': + case '?': + case '>': + case '<': + case '=': + { + ByteString Text { static_cast(C0) }; + for (;;) { + auto C1 = peekChar(); + if (!isOperatorPart(C1)) { + break; + } + Text.push_back(static_cast(C1)); + getChar(); + } + if (Text == "=") { + return new Equals(StartLoc); + } else if (Text.back() == '=' && Text[Text.size()-2] != '=') { + return new Assignment(Text.substr(0, Text.size()-1), StartLoc); + } + return new CustomOperator(Text, StartLoc); + } + +#define BOLT_SIMPLE_TOKEN(ch, name) case ch: return new name(StartLoc); + + //BOLT_SIMPLE_TOKEN(',', Comma) BOLT_SIMPLE_TOKEN(':', Colon) BOLT_SIMPLE_TOKEN('(', LParen) BOLT_SIMPLE_TOKEN(')', RParen) @@ -251,12 +302,13 @@ after_string_contents: BOLT_SIMPLE_TOKEN(']', RBracket) BOLT_SIMPLE_TOKEN('{', LBrace) BOLT_SIMPLE_TOKEN('}', RBrace) - BOLT_SIMPLE_TOKEN('=', Equals) default: + throw UnexpectedStringDiagnostic(StartLoc, String { C0 }); + // TODO Add a diagnostic message indicating that scanning failed. - return new Invalid(StartLoc); + //return new Invalid(StartLoc); } diff --git a/src/main.cc b/src/main.cc index 48289cae0..725488223 100644 --- a/src/main.cc +++ b/src/main.cc @@ -10,6 +10,7 @@ #include "bolt/Diagnostics.hpp" #include "bolt/Scanner.hpp" #include "bolt/Parser.hpp" +#include "bolt/Checker.hpp" using namespace bolt; @@ -124,6 +125,9 @@ int main(int argc, const char* argv[]) { SF = P.parseSourceFile(); #endif + Checker TheChecker; + TheChecker.check(SF); + return 0; }