Partially implement a type checker

This commit is contained in:
Sam Vervaeck 2022-08-21 16:25:52 +02:00
parent fee5085497
commit cd1e20d460
9 changed files with 912 additions and 56 deletions

View file

@ -13,6 +13,7 @@ add_executable(
src/Diagnostics.cc
src/Scanner.cc
src/Parser.cc
src/Checker.cc
src/main.cc
)
target_compile_options(

View file

@ -33,6 +33,7 @@ namespace bolt {
BlockEnd,
LineFoldEnd,
CustomOperator,
Assignment,
Identifier,
StringLiteral,
IntegerLiteral,
@ -42,6 +43,8 @@ namespace bolt {
ReferenceExpression,
ConstantExpression,
CallExpression,
InfixExpression,
UnaryExpression,
ExpressionStatement,
ReturnStatement,
TypeAssert,
@ -377,6 +380,19 @@ namespace bolt {
};
class Assignment : public Token {
public:
ByteString Text;
Assignment(ByteString Text, TextLoc StartLoc): Token(NodeType::Assignment, StartLoc), Text(Text) {}
std::string getText() const override;
~Assignment();
};
class Identifier : public Token {
public:
@ -433,24 +449,6 @@ namespace bolt {
};
class SourceElement : public Node {
public:
SourceElement(NodeType Type): Node(Type) {}
~SourceElement();
};
class LetBodyElement : public Node {
public:
LetBodyElement(NodeType Type): Node(Type) {}
~LetBodyElement();
};
class TypeExpression : public Node {
public:
@ -546,19 +544,54 @@ namespace bolt {
};
class Statement : public LetBodyElement {
class InfixExpression : public Expression {
public:
Statement(NodeType Type): LetBodyElement(Type) {}
Expression* LHS;
Token* Operator;
Expression* RHS;
InfixExpression(Expression* LHS, Token* Operator, Expression* RHS):
Expression(NodeType::InfixExpression),
LHS(LHS),
Operator(Operator),
RHS(RHS) {}
~InfixExpression();
};
class UnaryExpression : public Expression {
public:
Token* Operator;
Expression* Argument;
UnaryExpression(
Token* Operator,
Expression* Argument
): Expression(NodeType::UnaryExpression),
Operator(Operator),
Argument(Argument) {}
~UnaryExpression();
};
class Statement : public Node {
public:
Statement(NodeType Type): Node(Type) {}
~Statement();
};
class ExpressionStatement : public Statement, public SourceElement {
class ExpressionStatement : public Statement {
public:
ExpressionStatement(Expression* Expression): Statement(NodeType::ExpressionStatement), SourceElement(NodeType::ExpressionStatement), Expression(Expression) {}
ExpressionStatement(Expression* Expression):
Statement(NodeType::ExpressionStatement), Expression(Expression) {}
Expression* Expression;
@ -615,10 +648,15 @@ namespace bolt {
class LetBlockBody : public LetBody {
public:
LetBlockBody(BlockStart* BlockStart, std::vector<LetBodyElement*> Elements): LetBody(NodeType::LetBlockBody), BlockStart(BlockStart), Elements(Elements) {}
LetBlockBody(
BlockStart* BlockStart,
std::vector<Node*> Elements
): LetBody(NodeType::LetBlockBody),
BlockStart(BlockStart),
Elements(Elements) {}
BlockStart* BlockStart;
std::vector<LetBodyElement*> Elements;
std::vector<Node*> Elements;
~LetBlockBody();
@ -641,7 +679,7 @@ namespace bolt {
};
class LetDeclaration : public SourceElement, public LetBodyElement {
class LetDeclaration : public Node {
public:
PubKeyword* PubKeyword;
@ -660,8 +698,7 @@ namespace bolt {
std::vector<Param*> Params,
class TypeAssert* TypeAssert,
LetBody* Body
): SourceElement(NodeType::LetDeclaration),
LetBodyElement(NodeType::LetDeclaration),
): Node(NodeType::LetDeclaration),
PubKeyword(PubKeyword),
LetKeywod(LetKeywod),
MutKeyword(MutKeyword),
@ -694,7 +731,7 @@ namespace bolt {
};
class StructDecl : public SourceElement {
class StructDecl : public Node {
public:
StructDecl(
@ -702,7 +739,7 @@ namespace bolt {
Identifier* Name,
Dot* Dot,
std::vector<StructDeclField*> Fields
): SourceElement(NodeType::StructDecl),
): Node(NodeType::StructDecl),
StructKeyword(StructKeyword),
Name(Name),
Dot(Dot),
@ -721,9 +758,10 @@ namespace bolt {
public:
SourceFile(std::vector<SourceElement*> Elements): Node(NodeType::SourceFile), Elements(Elements) {}
SourceFile(std::vector<Node*> Elements):
Node(NodeType::SourceFile), Elements(Elements) {}
std::vector<SourceElement*> Elements;
std::vector<Node*> Elements;
~SourceFile();

279
include/bolt/Checker.hpp Normal file
View file

@ -0,0 +1,279 @@
#pragma once
#include "zen/config.hpp"
#include "bolt/ByteString.hpp"
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <optional>
namespace bolt {
class Node;
class Expression;
class SourceFile;
class Type;
class TVar;
using TVSub = std::unordered_map<TVar*, Type*>;
using TVSet = std::unordered_set<TVar*>;
enum class TypeKind : unsigned char {
Var,
Con,
Arrow,
Any,
};
class Type {
const TypeKind Kind;
protected:
inline Type(TypeKind Kind):
Kind(Kind) {}
public:
bool hasTypeVar(const TVar* TV);
Type* substitute(const TVSub& Sub);
inline TypeKind getKind() const noexcept {
return Kind;
}
};
class TCon : public Type {
public:
const size_t Id;
std::vector<Type*> Args;
inline TCon(const size_t Id, std::vector<Type*> Args ):
Type(TypeKind::Con), Id(Id), Args(Args) {}
};
class TVar : public Type {
public:
const size_t Id;
inline TVar(size_t Id):
Type(TypeKind::Var), Id(Id) {}
};
class TArrow : public Type {
public:
std::vector<Type*> ParamTypes;
Type* ReturnType;
inline TArrow(
std::vector<Type*> ParamTypes,
Type* ReturnType
): Type(TypeKind::Arrow),
ParamTypes(ParamTypes),
ReturnType(ReturnType) {}
};
class TAny : public Type {
public:
inline TAny():
Type(TypeKind::Any) {}
};
// template<typename T>
// struct DerefHash {
// std::size_t operator()(const T& Value) const noexcept {
// return std::hash<decltype(*Value)>{}(*Value);
// }
// };
class Constraint;
class Forall {
public:
TVSet TVs;
std::vector<Constraint*> Constriants;
Type* Type;
};
enum class SchemeKind : unsigned char {
Forall,
};
class Scheme {
const SchemeKind Kind;
union {
Forall F;
};
public:
inline Scheme(Forall F):
Kind(SchemeKind::Forall), F(F) {}
inline Scheme(const Scheme& Other):
Kind(Other.Kind) {
switch (Kind) {
case SchemeKind::Forall:
F = Other.F;
break;
}
}
inline Scheme(Scheme&& Other):
Kind(std::move(Other.Kind)) {
switch (Kind) {
case SchemeKind::Forall:
F = std::move(Other.F);
break;
}
}
template<typename T>
T& as();
template<>
Forall& as<Forall>() {
ZEN_ASSERT(Kind == SchemeKind::Forall);
return F;
}
inline SchemeKind getKind() const noexcept {
return Kind;
}
~Scheme() {
switch (Kind) {
case SchemeKind::Forall:
F.~Forall();
break;
}
}
};
class TypeEnv {
std::unordered_map<ByteString, Scheme> Mapping;
public:
void add(ByteString Name, Scheme S);
Scheme* lookup(ByteString Name);
Type* lookupMono(ByteString Name);
};
enum class ConstraintKind {
Equal,
Many,
Empty,
};
class Constraint {
const ConstraintKind Kind;
public:
inline Constraint(ConstraintKind Kind):
Kind(Kind) {}
inline ConstraintKind getKind() const noexcept {
return Kind;
}
virtual ~Constraint() {}
};
using ConstraintSet = std::vector<Constraint*>;
class CEqual : public Constraint {
public:
Type* Left;
Type* Right;
inline CEqual(Type* Left, Type* Right):
Constraint(ConstraintKind::Equal), Left(Left), Right(Right) {}
};
class CMany : public Constraint {
public:
ConstraintSet Constraints;
inline CMany(ConstraintSet Constraints):
Constraint(ConstraintKind::Many), Constraints(Constraints) {}
};
class CEmpty : public Constraint {
public:
inline CEmpty():
Constraint(ConstraintKind::Empty) {}
};
class InferContext {
ConstraintSet& Constraints;
public:
TypeEnv& Env;
inline InferContext(ConstraintSet& Constraints, TypeEnv& Env):
Constraints(Constraints), Env(Env) {}
void addConstraint(Constraint* C);
};
class Checker {
size_t nextTypeVarId = 0;
Type* inferExpression(Expression* Expression, InferContext& Env);
void infer(Node* node, InferContext& Env);
TVar* createTypeVar();
Type* instantiate(Scheme& S);
bool unify(Type* A, Type* B, TVSub& Solution);
void solve(Constraint* Constraint);
public:
void check(SourceFile* SF);
};
}

View file

@ -1,18 +1,73 @@
#pragma once
#include <unordered_map>
#include <optional>
#include "bolt/CST.hpp"
namespace bolt {
class Scanner;
enum OperatorFlags {
OperatorFlags_Prefix = 1,
OperatorFlags_Suffix = 2,
OperatorFlags_InfixL = 4,
OperatorFlags_InfixR = 8,
};
struct OperatorInfo {
int Precedence;
unsigned Flags;
inline bool isPrefix() const noexcept {
return Flags & OperatorFlags_Prefix;
}
inline bool isSuffix() const noexcept {
return Flags & OperatorFlags_Suffix;
}
inline bool isInfix() const noexcept {
return Flags & (OperatorFlags_InfixL | OperatorFlags_InfixR);
}
inline bool isRightAssoc() const noexcept {
return Flags & OperatorFlags_InfixR;
}
};
class OperatorTable {
std::unordered_map<std::string, OperatorInfo> Mapping;
public:
void add(std::string Name, unsigned Flags, int Precedence);
std::optional<OperatorInfo> getInfix(Token* T);
bool isInfix(Token* T);
bool isPrefix(Token* T);
bool isSuffix(Token* T);
};
class Parser {
Stream<Token*>& Tokens;
OperatorTable ExprOperators;
Token* peekFirstTokenAfterModifiers();
Expression* parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence);
public:
Parser(Stream<Token*>& S);
@ -27,15 +82,21 @@ namespace bolt {
ReferenceExpression* parseReferenceExpression();
Expression* parseUnaryExpression();
Expression* parsePrimitiveExpression();
Expression* parseExpression();
Expression* parseCallExpression();
ExpressionStatement* parseExpressionStatement();
LetBodyElement* parseLetBodyElement();
Node* parseLetBodyElement();
LetDeclaration* parseLetDeclaration();
SourceElement* parseSourceElement();
Node* parseSourceElement();
SourceFile* parseSourceFile();

View file

@ -77,6 +77,9 @@ namespace bolt {
CustomOperator::~CustomOperator() {
}
Assignment::~Assignment() {
}
Identifier::~Identifier() {
}
@ -93,12 +96,6 @@ namespace bolt {
Name->unref();
}
SourceElement::~SourceElement() {
}
LetBodyElement::~LetBodyElement() {
}
TypeExpression::~TypeExpression() {
}
@ -131,6 +128,17 @@ namespace bolt {
}
}
InfixExpression::~InfixExpression() {
LHS->unref();
Operator->unref();
RHS->unref();
}
UnaryExpression::~UnaryExpression() {
Operator->unref();
Argument->unref();
}
Statement::~Statement() {
}
@ -297,6 +305,10 @@ namespace bolt {
return Text;
}
std::string Assignment::getText() const {
return Text + "=";
}
std::string Identifier::getText() const {
return Text;
}

307
src/Checker.cc Normal file
View file

@ -0,0 +1,307 @@
#include <stack>
#include "zen/config.hpp"
#include "bolt/CST.hpp"
#include "bolt/Checker.hpp"
namespace bolt {
Scheme* TypeEnv::lookup(ByteString Name) {
auto Match = Mapping.find(Name);
if (Match == Mapping.end()) {
return {};
}
return &Match->second;
}
Type* TypeEnv::lookupMono(ByteString Name) {
auto Match = Mapping.find(Name);
if (Match == Mapping.end()) {
return nullptr;
}
auto& F = Match->second.as<Forall>();
ZEN_ASSERT(F.TVs.empty());
return F.Type;
}
bool Type::hasTypeVar(const TVar* TV) {
switch (Kind) {
case TypeKind::Var:
return static_cast<TVar*>(this)->Id == TV->Id;
case TypeKind::Arrow:
{
auto Y = static_cast<TArrow*>(this);
for (auto Ty: Y->ParamTypes) {
if (Ty->hasTypeVar(TV)) {
return true;
}
}
return Y->ReturnType->hasTypeVar(TV);
}
}
}
Type* Type::substitute(const TVSub &Sub) {
switch (Kind) {
case TypeKind::Var:
{
auto Y = static_cast<TVar*>(this);
auto Match = Sub.find(Y);
return Match != Sub.end() ? Match->second : Y;
}
case TypeKind::Arrow:
{
auto Y = static_cast<TArrow*>(this);
std::vector<Type*> NewParamTypes;
for (auto Ty: Y->ParamTypes) {
NewParamTypes.push_back(Ty->substitute(Sub));
}
auto NewRetTy = Y->ReturnType->substitute(Sub) ;
return new TArrow(NewParamTypes, NewRetTy);
}
case TypeKind::Any:
return this;
case TypeKind::Con:
{
auto Y = static_cast<TCon*>(this);
std::vector<Type*> NewArgs;
for (auto Arg: Y->Args) {
NewArgs.push_back(Arg->substitute(Sub));
}
return new TCon(Y->Id, Y->Args);
}
}
}
void InferContext::addConstraint(Constraint *C) {
Constraints.push_back(C);
}
void Checker::infer(Node* X, InferContext& Ctx) {
switch (X->Type) {
case NodeType::SourceFile:
{
auto Y = static_cast<SourceFile*>(X);
for (auto Element: Y->Elements) {
infer(Element, Ctx);
}
break;
}
case NodeType::LetDeclaration:
{
// TODO
break;
}
case NodeType::ExpressionStatement:
{
auto Y = static_cast<ExpressionStatement*>(X);
inferExpression(Y->Expression, Ctx);
break;
}
default:
ZEN_UNREACHABLE
}
}
TVar* Checker::createTypeVar() {
return new TVar(nextTypeVarId++);
}
Type* Checker::instantiate(Scheme& S) {
switch (S.getKind()) {
case SchemeKind::Forall:
{
auto& F = S.as<Forall>();
TVSub Sub;
for (auto TV: F.TVs) {
Sub[TV] = createTypeVar();
}
return F.Type->substitute(Sub);
}
}
}
Type* Checker::inferExpression(Expression* X, InferContext& Ctx) {
switch (X->Type) {
case NodeType::ConstantExpression:
{
auto Y = static_cast<ConstantExpression*>(X);
switch (Y->Token->Type) {
case NodeType::IntegerLiteral:
return Ctx.Env.lookupMono("Int");
case NodeType::StringLiteral:
return Ctx.Env.lookupMono("String");
default:
ZEN_UNREACHABLE
}
}
case NodeType::ReferenceExpression:
{
auto Y = static_cast<ReferenceExpression*>(X);
auto Scm = Ctx.Env.lookup(Y->Name->Text);
if (Scm == nullptr) {
// TODO add diagnostic
return new TAny();
}
return instantiate(*Scm);
}
case NodeType::InfixExpression:
{
auto Y = static_cast<InfixExpression*>(X);
auto Scm = Ctx.Env.lookup(Y->Operator->getText());
if (Scm == nullptr) {
// TODO add diagnostic
return new TAny();
}
auto OpTy = instantiate(*Scm);
auto RetTy = createTypeVar();
std::vector<Type*> ArgTys;
ArgTys.push_back(inferExpression(Y->LHS, Ctx));
ArgTys.push_back(inferExpression(Y->RHS, Ctx));
Ctx.addConstraint(new CEqual { new TArrow(ArgTys, RetTy), OpTy });
return RetTy;
}
default:
ZEN_UNREACHABLE
}
}
void Checker::check(SourceFile *SF) {
TypeEnv Global;
ConstraintSet Constraints;
InferContext Toplevel { Constraints, Global };
infer(SF, Toplevel);
solve(new CMany(Constraints));
}
void Checker::solve(Constraint* Constraint) {
std::stack<class Constraint*> Queue;
TVSub Sub;
while (!Queue.empty()) {
auto Constraint = Queue.top();
Queue.pop();
switch (Constraint->getKind()) {
case ConstraintKind::Empty:
break;
case ConstraintKind::Many:
{
auto Y = static_cast<CMany*>(Constraint);
for (auto Constraint: Y->Constraints) {
Queue.push(Constraint);
}
break;
}
case ConstraintKind::Equal:
{
auto Y = static_cast<CEqual*>(Constraint);
if (!unify(Y->Left, Y->Right, Sub)) {
// TODO diagnostic
fprintf(stderr, "unification error\n");
}
break;
}
}
}
}
bool Checker::unify(Type* A, Type* B, TVSub& Solution) {
if (A->getKind() == TypeKind::Var) {
auto Match = Solution.find(static_cast<TVar*>(A));
if (Match != Solution.end()) {
A = Match->second;
}
}
if (B->getKind() == TypeKind::Var) {
auto Match = Solution.find(static_cast<TVar*>(B));
if (Match != Solution.end()) {
B = Match->second;
}
}
if (A->getKind() == TypeKind::Var) {
auto Y = static_cast<TVar*>(A);
if (B->hasTypeVar(Y)) {
// TODO occurs check
}
Solution[Y] = B;
return true;
}
if (B->getKind() == TypeKind::Var) {
return unify(B, A, Solution);
}
if (A->getKind() == TypeKind::Arrow && B->getKind() == TypeKind::Arrow) {
auto Y = static_cast<TArrow*>(A);
auto Z = static_cast<TArrow*>(B);
if (Y->ParamTypes.size() != Z->ParamTypes.size()) {
// TODO diagnostic
return false;
}
auto Count = Y->ParamTypes.size();
for (std::size_t I = 0; I < Count; I++) {
if (!unify(Y->ParamTypes[I], Z->ParamTypes[I], Solution)) {
return false;
}
}
return unify(Y->ReturnType, Z->ReturnType, Solution);
}
if (A->getKind() == TypeKind::Con && B->getKind() == TypeKind::Arrow) {
auto Y = static_cast<TCon*>(A);
auto Z = static_cast<TCon*>(B);
if (Y->Id != Z->Id) {
// TODO diagnostic
return false;
}
ZEN_ASSERT(Y->Args.size() == Z->Args.size());
auto Count = Y->Args.size();
for (std::size_t I = 0; I < Count; I++) {
if (!unify(Y->Args[I], Z->Args[I], Solution)) {
return false;
}
}
return true;
}
// TODO diagnostic
return false;
}
}

View file

@ -3,11 +3,54 @@
#include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp"
#include "bolt/Diagnostics.hpp"
#include <vector>
namespace bolt {
std::optional<OperatorInfo> OperatorTable::getInfix(Token* T) {
auto Match = Mapping.find(T->getText());
if (Match == Mapping.end() || !Match->second.isInfix()) {
return {};
}
return Match->second;
}
bool OperatorTable::isInfix(Token* T) {
auto Match = Mapping.find(T->getText());
return Match != Mapping.end() && Match->second.isInfix();
}
bool OperatorTable::isPrefix(Token* T) {
auto Match = Mapping.find(T->getText());
return Match != Mapping.end() && Match->second.isPrefix();
}
bool OperatorTable::isSuffix(Token* T) {
auto Match = Mapping.find(T->getText());
return Match != Mapping.end() && Match->second.isSuffix();
}
void OperatorTable::add(std::string Name, unsigned Flags, int Precedence) {
Mapping.emplace(Name, OperatorInfo { Precedence, Flags });
}
Parser::Parser(Stream<Token*>& S):
Tokens(S) {}
Tokens(S) {
ExprOperators.add("**", OperatorFlags_InfixR, 10);
ExprOperators.add("*", OperatorFlags_InfixL, 5);
ExprOperators.add("/", OperatorFlags_InfixL, 5);
ExprOperators.add("+", OperatorFlags_InfixL, 4);
ExprOperators.add("-", OperatorFlags_InfixL, 4);
ExprOperators.add("<", OperatorFlags_InfixL, 3);
ExprOperators.add(">", OperatorFlags_InfixL, 3);
ExprOperators.add("<=", OperatorFlags_InfixL, 3);
ExprOperators.add(">=", OperatorFlags_InfixL, 3);
ExprOperators.add("==", OperatorFlags_InfixL, 3);
ExprOperators.add("!=", OperatorFlags_InfixL, 3);
ExprOperators.add(":", OperatorFlags_InfixL, 2);
ExprOperators.add("<|>", OperatorFlags_InfixL, 1);
ExprOperators.add("$", OperatorFlags_InfixR, 0);
}
Token* Parser::peekFirstTokenAfterModifiers() {
std::size_t I = 0;
@ -73,7 +116,7 @@ namespace bolt {
}
}
Expression* Parser::parseExpression() {
Expression* Parser::parsePrimitiveExpression() {
auto T0 = Tokens.peek();
switch (T0->Type) {
case NodeType::Identifier:
@ -88,6 +131,65 @@ namespace bolt {
}
}
Expression* Parser::parseCallExpression() {
auto Operator = parsePrimitiveExpression();
std::vector<Expression*> Args;
for (;;) {
auto T1 = Tokens.peek();
if (T1->Type == NodeType::LineFoldEnd || ExprOperators.isInfix(T1)) {
break;
}
Args.push_back(parsePrimitiveExpression());
}
if (Args.empty()) {
return Operator;
}
return new CallExpression(Operator, Args);
}
Expression* Parser::parseUnaryExpression() {
std::vector<Token*> Prefix;
for (;;) {
auto T0 = Tokens.peek();
if (!ExprOperators.isPrefix(T0)) {
break;
}
Tokens.get();
Prefix.push_back(T0);
}
auto E = parseCallExpression();
for (auto Iter = Prefix.rbegin(); Iter != Prefix.rend(); Iter++) {
E = new UnaryExpression(*Iter, E);
}
return E;
}
Expression* Parser::parseInfixOperatorAfterExpression(Expression* LHS, int MinPrecedence) {
for (;;) {
auto T0 = Tokens.peek();
auto Info0 = ExprOperators.getInfix(T0);
if (!Info0 || Info0->Precedence < MinPrecedence) {
break;
}
Tokens.get();
auto RHS = parseUnaryExpression();
for (;;) {
auto T1 = Tokens.peek();
auto Info1 = ExprOperators.getInfix(T1);
if (!Info1 || Info1->Precedence < Info0->Precedence && (Info1->Precedence > Info0->Precedence || Info1->isRightAssoc())) {
break;
}
RHS = parseInfixOperatorAfterExpression(RHS, Info1->Precedence);
}
LHS = new InfixExpression(LHS, T0, RHS);
}
return LHS;
}
Expression* Parser::parseExpression() {
return parseInfixOperatorAfterExpression(parseUnaryExpression(), 0);
}
ExpressionStatement* Parser::parseExpressionStatement() {
auto E = parseExpression();
BOLT_EXPECT_TOKEN(LineFoldEnd);
@ -146,7 +248,7 @@ after_params:
case NodeType::BlockStart:
{
Tokens.get();
std::vector<LetBodyElement*> Elements;
std::vector<Node*> Elements;
for (;;) {
auto T3 = Tokens.peek();
if (T3->Type == NodeType::BlockEnd) {
@ -189,7 +291,7 @@ after_params:
);
}
LetBodyElement* Parser::parseLetBodyElement() {
Node* Parser::parseLetBodyElement() {
auto T0 = peekFirstTokenAfterModifiers();
switch (T0->Type) {
case NodeType::LetKeyword:
@ -199,7 +301,7 @@ after_params:
}
}
SourceElement* Parser::parseSourceElement() {
Node* Parser::parseSourceElement() {
auto T0 = peekFirstTokenAfterModifiers();
switch (T0->Type) {
case NodeType::LetKeyword:
@ -210,7 +312,7 @@ after_params:
}
SourceFile* Parser::parseSourceFile() {
std::vector<SourceElement*> Elements;
std::vector<Node*> Elements;
for (;;) {
auto T0 = Tokens.peek();
if (T0->Type == NodeType::EndOfFile) {

View file

@ -22,6 +22,28 @@ namespace bolt {
}
}
static inline bool isOperatorPart(Char Chr) {
switch (Chr) {
case '+':
case '-':
case '*':
case '/':
case '^':
case '&':
case '|':
case '%':
case '$':
case '!':
case '?':
case '>':
case '<':
case '=':
return true;
default:
return false;
}
}
static bool isIdentifierPart(Char Chr) {
return (Chr >= 65 && Chr <= 90) // Uppercase letter
|| (Chr >= 96 && Chr <= 122) // Lowercase letter
@ -48,18 +70,17 @@ namespace bolt {
Token* Scanner::read() {
TextLoc StartLoc;
Char C0;
for (;;) {
C0 = peekChar();
StartLoc = getCurrentLoc();
C0 = getChar();
if (!isWhiteSpace(C0)) {
break;
}
getChar();
}
auto StartLoc = getCurrentLoc();
switch (C0) {
case static_cast<Char>(EOF):
@ -76,7 +97,6 @@ namespace bolt {
case '8':
case '9':
{
getChar();
Integer I = toDigit(C0);
for (;;) {
auto C1 = peekChar();
@ -156,7 +176,6 @@ digit_finish:
case 'Z':
case '_':
{
getChar();
ByteString Text { static_cast<char>(C0) };
for (;;) {
auto C1 = peekChar();
@ -188,7 +207,6 @@ digit_finish:
case '"':
{
getChar();
ByteString Text;
bool Escaping = false;
for (;;) {
@ -229,7 +247,6 @@ after_string_contents:
case '.':
{
getChar();
auto C1 = peekChar();
if (C1 == '.') {
getChar();
@ -242,8 +259,42 @@ after_string_contents:
return new Dot(StartLoc);
}
#define BOLT_SIMPLE_TOKEN(ch, name) case ch: getChar(); return new name(StartLoc);
case '+':
case '-':
case '*':
case '/':
case '^':
case '&':
case '|':
case '%':
case '$':
case '!':
case '?':
case '>':
case '<':
case '=':
{
ByteString Text { static_cast<char>(C0) };
for (;;) {
auto C1 = peekChar();
if (!isOperatorPart(C1)) {
break;
}
Text.push_back(static_cast<char>(C1));
getChar();
}
if (Text == "=") {
return new Equals(StartLoc);
} else if (Text.back() == '=' && Text[Text.size()-2] != '=') {
return new Assignment(Text.substr(0, Text.size()-1), StartLoc);
}
return new CustomOperator(Text, StartLoc);
}
#define BOLT_SIMPLE_TOKEN(ch, name) case ch: return new name(StartLoc);
//BOLT_SIMPLE_TOKEN(',', Comma)
BOLT_SIMPLE_TOKEN(':', Colon)
BOLT_SIMPLE_TOKEN('(', LParen)
BOLT_SIMPLE_TOKEN(')', RParen)
@ -251,12 +302,13 @@ after_string_contents:
BOLT_SIMPLE_TOKEN(']', RBracket)
BOLT_SIMPLE_TOKEN('{', LBrace)
BOLT_SIMPLE_TOKEN('}', RBrace)
BOLT_SIMPLE_TOKEN('=', Equals)
default:
throw UnexpectedStringDiagnostic(StartLoc, String { C0 });
// TODO Add a diagnostic message indicating that scanning failed.
return new Invalid(StartLoc);
//return new Invalid(StartLoc);
}

View file

@ -10,6 +10,7 @@
#include "bolt/Diagnostics.hpp"
#include "bolt/Scanner.hpp"
#include "bolt/Parser.hpp"
#include "bolt/Checker.hpp"
using namespace bolt;
@ -124,6 +125,9 @@ int main(int argc, const char* argv[]) {
SF = P.parseSourceFile();
#endif
Checker TheChecker;
TheChecker.check(SF);
return 0;
}