From b6706dd8018d60c0087aa835393a6c1af7ad0ea0 Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Sun, 21 May 2023 14:50:28 +0200 Subject: [PATCH] Fix constructors like True and False not being parsed --- include/bolt/CST.hpp | 32 +++++++++++++++++++++++++------- src/CST.cc | 12 ++++++++++-- src/Checker.cc | 34 +++++++++++++++++----------------- src/Diagnostics.cc | 4 +++- src/Parser.cc | 12 +++++++++--- 5 files changed, 64 insertions(+), 30 deletions(-) diff --git a/include/bolt/CST.hpp b/include/bolt/CST.hpp index cfb896ab2..13e15fd80 100644 --- a/include/bolt/CST.hpp +++ b/include/bolt/CST.hpp @@ -707,13 +707,30 @@ namespace bolt { }; - class Identifier : public Token { + class Symbol : public Token { + public: + + inline Symbol(NodeKind Kind, TextLoc StartLoc): + Token(Kind, StartLoc) {} + + virtual ByteString getCanonicalText() = 0; + + static bool classof(const Node* N) { + return N->getKind() == NodeKind::Identifier + || N->getKind() == NodeKind::IdentifierAlt; + } + + }; + + class Identifier : public Symbol { public: ByteString Text; Identifier(ByteString Text, TextLoc StartLoc): - Token(NodeKind::Identifier, StartLoc), Text(Text) {} + Symbol(NodeKind::Identifier, StartLoc), Text(Text) {} + + ByteString getCanonicalText() override; std::string getText() const override; @@ -725,13 +742,15 @@ namespace bolt { }; - class IdentifierAlt : public Token { + class IdentifierAlt : public Symbol { public: ByteString Text; IdentifierAlt(ByteString Text, TextLoc StartLoc): - Token(NodeKind::IdentifierAlt, StartLoc), Text(Text) {} + Symbol(NodeKind::IdentifierAlt, StartLoc), Text(Text) {} + + ByteString getCanonicalText() override; std::string getText() const override; @@ -970,11 +989,11 @@ namespace bolt { public: std::vector> ModulePath; - Identifier* Name; + Symbol* Name; ReferenceExpression( std::vector> ModulePath, - Identifier* Name + Symbol* Name ): Expression(NodeKind::ReferenceExpression), ModulePath(ModulePath), Name(Name) {} @@ -1007,7 +1026,6 @@ namespace bolt { }; - class MatchExpression : public Expression { public: diff --git a/src/CST.cc b/src/CST.cc index d9f8899f2..25d0bce78 100644 --- a/src/CST.cc +++ b/src/CST.cc @@ -605,12 +605,20 @@ namespace bolt { return "instance"; } + ByteString Identifier::getCanonicalText() { + return Text; + } + + ByteString IdentifierAlt::getCanonicalText() { + return Text; + } + SymbolPath ReferenceExpression::getSymbolPath() const { std::vector ModuleNames; for (auto [Name, Dot]: ModulePath) { - ModuleNames.push_back(Name->Text); + ModuleNames.push_back(Name->getCanonicalText()); } - return SymbolPath { ModuleNames, Name->Text }; + return SymbolPath { ModuleNames, Name->getCanonicalText() }; } } diff --git a/src/Checker.cc b/src/Checker.cc index a79931925..33df0acfd 100644 --- a/src/Checker.cc +++ b/src/Checker.cc @@ -331,8 +331,8 @@ namespace bolt { { auto Class = static_cast(X); for (auto TE: Class->TypeVars) { - auto TV = createRigidVar(TE->Name->Text); - TV->Contexts.emplace(Class->Name->Text); + auto TV = createRigidVar(TE->Name->getCanonicalText()); + TV->Contexts.emplace(Class->Name->getCanonicalText()); TE->setType(TV); } for (auto Element: Class->Elements) { @@ -344,9 +344,9 @@ namespace bolt { case NodeKind::InstanceDeclaration: { auto Decl = static_cast(X); - auto Match = InstanceMap.find(Decl->Name->Text); + auto Match = InstanceMap.find(Decl->Name->getCanonicalText()); if (Match == InstanceMap.end()) { - InstanceMap.emplace(Decl->Name->Text, std::vector { Decl }); + InstanceMap.emplace(Decl->Name->getCanonicalText(), std::vector { Decl }); } else { Match->second.push_back(Decl); } @@ -376,7 +376,7 @@ namespace bolt { auto Decl = static_cast(Let->Parent); for (auto TE: Decl->TypeVars) { auto TV = llvm::cast(TE->getType()); - NewCtx->Env.emplace(TE->Name->Text, new Forall(TV)); + NewCtx->Env.emplace(TE->Name->getCanonicalText(), new Forall(TV)); NewCtx->TVs->emplace(TV); } } @@ -617,7 +617,7 @@ namespace bolt { for (auto TE: D->TEs) { Types.push_back(inferTypeExpression(TE)); } - return new CClass(D->Name->Text, Types); + return new CClass(D->Name->getCanonicalText(), Types); } case NodeKind::EqualityConstraintExpression: { @@ -636,10 +636,10 @@ namespace bolt { case NodeKind::ReferenceTypeExpression: { auto RefTE = static_cast(N); - auto Ty = lookupMono(RefTE->Name->Text); + auto Ty = lookupMono(RefTE->Name->getCanonicalText()); if (Ty == nullptr) { if (Config.typeVarsRequireForall()) { - DE.add(RefTE->Name->Text, RefTE->Name); + DE.add(RefTE->Name->getCanonicalText(), RefTE->Name); } Ty = createTypeVar(); } @@ -650,13 +650,13 @@ namespace bolt { case NodeKind::VarTypeExpression: { auto VarTE = static_cast(N); - auto Ty = lookupMono(VarTE->Name->Text); + auto Ty = lookupMono(VarTE->Name->getCanonicalText()); if (Ty == nullptr) { if (Config.typeVarsRequireForall()) { - DE.add(VarTE->Name->Text, VarTE->Name); + DE.add(VarTE->Name->getCanonicalText(), VarTE->Name); } - Ty = createRigidVar(VarTE->Name->Text); - addBinding(VarTE->Name->Text, new Forall(Ty)); + Ty = createRigidVar(VarTE->Name->getCanonicalText()); + addBinding(VarTE->Name->getCanonicalText(), new Forall(Ty)); } N->setType(Ty); return Ty; @@ -725,9 +725,9 @@ namespace bolt { ZEN_ASSERT(Ctx->ReturnType != nullptr); return Ctx->ReturnType; } - auto Scm = lookup(Ref->Name->Text); + auto Scm = lookup(Ref->Name->getCanonicalText()); if (Scm == nullptr) { - DE.add(Ref->Name->Text, Ref->Name); + DE.add(Ref->Name->getCanonicalText(), Ref->Name); return createTypeVar(); } auto Ty = instantiate(Scm, X); @@ -791,7 +791,7 @@ namespace bolt { case NodeKind::BindPattern: { - addBinding(static_cast(Pattern)->Name->Text, new Forall(TVs, Constraints, Type)); + addBinding(static_cast(Pattern)->Name->getCanonicalText(), new Forall(TVs, Constraints, Type)); break; } @@ -813,7 +813,7 @@ namespace bolt { for (auto TE: Class->TypeVars) { Tys.push_back(llvm::cast(TE->getType())); } - Out.push_back(TypeclassSignature { Class->Name->Text, Tys }); + Out.push_back(TypeclassSignature { Class->Name->getCanonicalText(), Tys }); } if (Decl->TypeAssert != nullptr) { if (llvm::isa(Decl->TypeAssert->TypeExpression)) { @@ -827,7 +827,7 @@ namespace bolt { ZEN_ASSERT(llvm::isa(TV)); Tys.push_back(static_cast(TV)); } - Out.push_back(TypeclassSignature { TCE->Name->Text, Tys }); + Out.push_back(TypeclassSignature { TCE->Name->getCanonicalText(), Tys }); } } } diff --git a/src/Diagnostics.cc b/src/Diagnostics.cc index c625aeafa..c9a4e17fc 100644 --- a/src/Diagnostics.cc +++ b/src/Diagnostics.cc @@ -47,7 +47,9 @@ namespace bolt { static std::string describe(NodeKind Type) { switch (Type) { case NodeKind::Identifier: - return "an identifier"; + return "an identifier starting with a lowercase letter"; + case NodeKind::IdentifierAlt: + return "an identifier starting with a capital letter"; case NodeKind::CustomOperator: return "an operator"; case NodeKind::IntegerLiteral: diff --git a/src/Parser.cc b/src/Parser.cc index 53d86a5d0..958a41338 100644 --- a/src/Parser.cc +++ b/src/Parser.cc @@ -211,18 +211,24 @@ after_constraints: auto T0 = Tokens.peek(); switch (T0->getKind()) { case NodeKind::Identifier: + case NodeKind::IdentifierAlt: { std::vector> ModulePath; for (;;) { - auto T1 = Tokens.peek(); - if (T1->getKind() != NodeKind::IdentifierAlt) { + auto T1 = Tokens.peek(0); + auto T2 = Tokens.peek(1); + if (!llvm::isa(T1) || !llvm::isa(T2)) { break; } Tokens.get(); auto Dot = expectToken(); ModulePath.push_back(std::make_tuple(static_cast(T1), Dot)); } - return new ReferenceExpression(ModulePath, expectToken()); + auto T3 = Tokens.get(); + if (!llvm::isa(T3)) { + throw UnexpectedTokenDiagnostic(File, T3, { NodeKind::Identifier, NodeKind::IdentifierAlt }); + } + return new ReferenceExpression(ModulePath, static_cast(T3)); } case NodeKind::LParen: {