From 48f1b0f45c687da3642cc3481faa16ebd55b3628 Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Wed, 31 Aug 2022 13:29:56 +0200 Subject: [PATCH] Add some type-checking logic and improve diagnostics --- src/bin/bolt.ts | 29 +- src/checker.ts | 802 +++++++++++++++++++++++++++++++++++++++++++++ src/cst.ts | 128 +++++++- src/diagnostics.ts | 232 ++++++++++++- src/parser.ts | 118 +++---- src/scanner.ts | 11 +- src/util.ts | 4 + 7 files changed, 1248 insertions(+), 76 deletions(-) diff --git a/src/bin/bolt.ts b/src/bin/bolt.ts index 0957c8075..275ac8dc4 100644 --- a/src/bin/bolt.ts +++ b/src/bin/bolt.ts @@ -7,9 +7,11 @@ import path from "path" import fs from "fs" import yargs from "yargs" -import { Diagnostics } from "../diagnostics" +import { Diagnostics, UnexpectedCharDiagnostic, UnexpectedTokenDiagnostic } from "../diagnostics" import { Punctuator, Scanner } from "../scanner" -import { Parser } from "../parser" +import { ParseError, Parser } from "../parser" +import { Checker } from "../checker" +import { TextFile } from "../cst" function debug(value: any) { console.error(util.inspect(value, { colors: true, depth: Infinity })); @@ -31,14 +33,27 @@ yargs const cwd = args.C; const filename = path.resolve(cwd, args.file); + const diagnostics = new Diagnostics(); const text = fs.readFileSync(filename, 'utf8') - const scanner = new Scanner(text, 0, diagnostics); + const file = new TextFile(filename, text); + const scanner = new Scanner(text, 0, diagnostics, file); const punctuated = new Punctuator(scanner); - const parser = new Parser(punctuated); - const sourceFile = parser.parseSourceFile(); - - debug(sourceFile.toJSON()); + const parser = new Parser(file, punctuated); + let sourceFile; + try { + sourceFile = parser.parseSourceFile(); + } catch (error) { + if (!(error instanceof ParseError)) { + throw error; + } + diagnostics.add(new UnexpectedTokenDiagnostic(error.file, error.actual, error.expected)); + return; + } + sourceFile.setParents(); + //debug(sourceFile.toJSON()); + const checker = new Checker(diagnostics); + checker.check(sourceFile); } ) diff --git a/src/checker.ts b/src/checker.ts index e69de29bb..c8f07a2f7 100644 --- a/src/checker.ts +++ b/src/checker.ts @@ -0,0 +1,802 @@ +import { + Expression, + Pattern, + Syntax, + SyntaxKind, + TypeExpression +} from "./cst"; +import { BindingNotFoudDiagnostic, Diagnostics, UnificationFailedDiagnostic } from "./diagnostics"; +import { assert } from "./util"; + +export enum TypeKind { + Arrow, + Var, + Con, + Any, + Tuple, +} + +abstract class TypeBase { + + public abstract readonly kind: TypeKind; + + public abstract getTypeVars(): Iterable; + + public abstract substitute(sub: TVSub): Type; + + public hasTypeVar(tv: TVar): boolean { + for (const other of this.getTypeVars()) { + if (tv.id === other.id) { + return true; + } + } + return false; + } + +} + +class TVar extends TypeBase { + + public readonly kind = TypeKind.Var; + + public constructor( + public id: number, + ) { + super(); + } + + public *getTypeVars(): Iterable { + yield this; + } + + public substitute(sub: TVSub): Type { + return sub.get(this) ?? this; + } + +} + +class TArrow extends TypeBase { + + public readonly kind = TypeKind.Arrow; + + public constructor( + public paramTypes: Type[], + public returnType: Type, + ) { + super(); + } + + public *getTypeVars(): Iterable { + for (const paramType of this.paramTypes) { + yield* paramType.getTypeVars(); + } + yield* this.returnType.getTypeVars(); + } + + public substitute(sub: TVSub): Type { + let changed = false; + const newParamTypes = []; + for (const paramType of this.paramTypes) { + const newParamType = paramType.substitute(sub); + if (newParamType !== paramType) { + changed = true; + } + newParamTypes.push(newParamType); + } + const newReturnType = this.returnType.substitute(sub); + if (newReturnType !== this.returnType) { + changed = true; + } + return changed ? new TArrow(newParamTypes, newReturnType) : this; + } + +} + +class TCon extends TypeBase { + + public readonly kind = TypeKind.Con; + + public constructor( + public id: number, + public argTypes: Type[], + public displayName: string, + ) { + super(); + } + + public *getTypeVars(): Iterable { + for (const argType of this.argTypes) { + yield* argType.getTypeVars(); + } + } + + public substitute(sub: TVSub): Type { + let changed = false; + const newArgTypes = []; + for (const argType of this.argTypes) { + const newArgType = argType.substitute(sub); + if (newArgType !== argType) { + changed = true; + } + newArgTypes.push(newArgType); + } + return changed ? new TCon(this.id, newArgTypes, this.displayName) : this; + } + +} + +class TAny extends TypeBase { + + public readonly kind = TypeKind.Any; + + public *getTypeVars(): Iterable { + + } + + public substitute(sub: TVSub): Type { + return this; + } + +} + +class TTuple extends TypeBase { + + public readonly kind = TypeKind.Tuple; + + public constructor( + public elementTypes: Type[], + ) { + super(); + } + + public *getTypeVars(): Iterable { + for (const elementType of this.elementTypes) { + yield* elementType.getTypeVars(); + } + } + + public substitute(sub: TVSub): Type { + let changed = false; + const newElementTypes = []; + for (const elementType of this.elementTypes) { + const newElementType = elementType.substitute(sub); + if (newElementType !== elementType) { + changed = true; + } + newElementTypes.push(newElementType); + } + return changed ? new TTuple(newElementTypes) : this; + } + +} + +export type Type + = TCon + | TArrow + | TVar + | TAny + | TTuple + + +class TVSet { + + private mapping = new Map(); + + public add(tv: TVar): void { + this.mapping.set(tv.id, tv); + } + + public delete(tv: TVar): void { + this.mapping.delete(tv.id); + } + + public [Symbol.iterator](): Iterator { + return this.mapping.values(); + } + +} + +class TVSub { + + private mapping = new Map(); + + public set(tv: TVar, type: Type): void { + this.mapping.set(tv.id, type); + } + + public get(tv: TVar): Type | undefined { + return this.mapping.get(tv.id); + } + + public has(tv: TVar): boolean { + return this.mapping.has(tv.id); + } + + public delete(tv: TVar): void { + this.mapping.delete(tv.id); + } + + public values(): Iterable { + return this.mapping.values(); + } + +} + +const enum ConstraintKind { + Equal, + Many, +} + +abstract class ConstraintBase { + + public abstract substitute(sub: TVSub): Constraint; + +} + +class CEqual extends ConstraintBase { + + public readonly kind = ConstraintKind.Equal; + + public constructor( + public left: Type, + public right: Type, + public node: Syntax, + ) { + super(); + } + + public substitute(sub: TVSub): Constraint { + return new CEqual( + this.left.substitute(sub), + this.right.substitute(sub), + this.node, + ); + } + +} + +class CMany extends ConstraintBase { + + public readonly kind = ConstraintKind.Many; + + public constructor( + public elements: Constraint[] + ) { + super(); + } + + public substitute(sub: TVSub): Constraint { + const newElements = []; + for (const element of this.elements) { + newElements.push(element.substitute(sub)); + } + return new CMany(newElements); + } + +} + +type Constraint + = CEqual + | CMany + +class ConstraintSet extends Array { +} + +abstract class SchemeBase { +} + +class Forall extends SchemeBase { + + public constructor( + public tvs: TVar[], + public constraints: Constraint[], + public type: Type, + ) { + super(); + } + +} + +type Scheme + = Forall + +class TypeEnv extends Map { +} + +export interface InferContext { + typeVars: TVSet; + env: TypeEnv; + constraints: ConstraintSet; +} + +export class Checker { + + private nextTypeVarId = 0; + private nextConTypeId = 0; + + private stringType = new TCon(this.nextConTypeId++, [], 'String'); + private intType = new TCon(this.nextConTypeId++, [], 'Int'); + private boolType = new TCon(this.nextConTypeId++, [], 'Bool'); + + private typeEnvs: TypeEnv[] = []; + private typeVars: TVSet[] = []; + private constraints: ConstraintSet[] = []; + private returnTypes: Type[] = []; + + public constructor( + private diagnostics: Diagnostics + ) { + + } + + public getIntType(): Type { + return this.intType; + } + + public getStringType(): Type { + return this.stringType; + } + + public getBoolType(): Type { + return this.boolType; + } + + private createTypeVar(): TVar { + const typeVar = new TVar(this.nextTypeVarId++); + this.typeVars[this.typeVars.length-1].add(typeVar); + return typeVar; + } + + private addConstraint(constraint: Constraint): void { + this.constraints[this.constraints.length-1].push(constraint); + } + + private pushContext(context: InferContext) { + if (context.typeVars !== null) { + this.typeVars.push(context.typeVars); + } + if (context.env !== null) { + this.typeEnvs.push(context.env); + } + if (context.constraints !== null) { + this.constraints.push(context.constraints); + } + } + + private popContext(context: InferContext) { + if (context.typeVars !== null) { + this.typeVars.pop(); + } + if (context.env !== null) { + this.typeEnvs.pop(); + } + if (context.constraints !== null) { + this.constraints.pop(); + } + } + + private lookup(name: string): Scheme | null { + for (let i = this.typeEnvs.length-1; i >= 0; i--) { + const scheme = this.typeEnvs[i].get(name); + if (scheme !== undefined) { + return scheme; + } + } + return null; + } + + private getReturnType(): Type { + assert(this.returnTypes.length > 0); + return this.returnTypes[this.returnTypes.length-1]; + } + + private instantiate(scheme: Scheme): Type { + const sub = new TVSub(); + for (const tv of scheme.tvs) { + sub.set(tv, this.createTypeVar()); + } + for (const constraint of scheme.constraints) { + this.addConstraint(constraint.substitute(sub)); + } + return scheme.type.substitute(sub); + } + + private addBinding(name: string, scheme: Scheme): void { + const env = this.typeEnvs[this.typeEnvs.length-1]; + env.set(name, scheme); + } + + private forwardDeclare(node: Syntax): void { + + switch (node.kind) { + + case SyntaxKind.SourceFile: + { + for (const element of node.elements) { + this.forwardDeclare(element); + } + break; + } + + case SyntaxKind.ExpressionStatement: + case SyntaxKind.ReturnStatement: + { + // TODO This should be updated if block-scoped expressions are allowed. + break; + } + + case SyntaxKind.LetDeclaration: + { + const typeVars = new TVSet(); + const env = new TypeEnv(); + const constraints = new ConstraintSet(); + const context = { typeVars, env, constraints }; + node.context = context; + + this.pushContext(context); + + let type; + if (node.typeAssert !== null) { + type = this.inferTypeExpression(node.typeAssert.typeExpression); + } else { + type = this.createTypeVar(); + } + node.type = type; + + if (node.body !== null && node.body.kind === SyntaxKind.BlockBody) { + for (const element of node.body.elements) { + this.forwardDeclare(element); + } + } + + this.popContext(context); + + break; + } + + } + } + + public infer(node: Syntax): void { + + switch (node.kind) { + + case SyntaxKind.SourceFile: + { + for (const element of node.elements) { + this.infer(element); + } + break; + } + + case SyntaxKind.ExpressionStatement: + { + this.inferExpression(node.expression); + break; + } + + case SyntaxKind.ReturnStatement: + { + let type; + if (node.expression === null) { + type = new TTuple([]); + } else { + type = this.inferExpression(node.expression); + } + this.addConstraint( + new CEqual( + this.getReturnType(), + type, + node + ) + ); + break; + } + + case SyntaxKind.LetDeclaration: + { + // Get the type that was stored on the node by forwardDeclare() + const type = node.type!; + const context = node.context!; + + this.pushContext(context); + + const paramTypes = []; + const returnType = this.createTypeVar(); + for (const param of node.params) { + const paramType = this.createTypeVar() + this.inferBindings(param.pattern, paramType, [], []); + paramTypes.push(paramType); + } + + if (node.body !== null) { + switch (node.body.kind) { + case SyntaxKind.ExprBody: + { + this.addConstraint( + new CEqual( + this.inferExpression(node.body.expression), + returnType, + node.body.expression + ) + ); + break; + } + case SyntaxKind.BlockBody: + { + for (const element of node.body.elements) { + this.infer(element); + } + break; + } + } + } + + this.addConstraint(new CEqual(type, new TArrow(paramTypes, returnType), node)); + + this.popContext(context); + + this.inferBindings(node.pattern, type, context.typeVars, context.constraints); + + // FIXME these two may need to go below inferBindings + //this.typeVars.pop(); + //this.constraints.pop(); + + + break; + + } + + default: + throw new Error(`Unexpected ${node}`); + + } + + } + + public inferExpression(node: Expression): Type { + + switch (node.kind) { + + case SyntaxKind.ReferenceExpression: + { + assert(node.name.modulePath.length === 0); + const scheme = this.lookup(node.name.name.text); + if (scheme === null) { + this.diagnostics.add(new BindingNotFoudDiagnostic(node.name.name.text, node.name.name)); + return new TAny(); + } + return this.instantiate(scheme); + } + + case SyntaxKind.CallExpression: + { + const opType = this.inferExpression(node.func); + const retType = this.createTypeVar(); + const paramTypes = []; + for (const arg of node.args) { + paramTypes.push(this.inferExpression(arg)); + } + this.addConstraint( + new CEqual( + opType, + new TArrow(paramTypes, retType), + node + ) + ); + return retType; + } + + case SyntaxKind.ConstantExpression: + { + let ty; + switch (node.token.kind) { + case SyntaxKind.StringLiteral: + ty = this.getStringType(); + break; + case SyntaxKind.Integer: + ty = this.getIntType(); + break; + } + return ty; + } + + case SyntaxKind.NamedTupleExpression: + { + const scheme = this.lookup(node.name.text); + if (scheme === null) { + this.diagnostics.add(new BindingNotFoudDiagnostic(node.name.text, node.name)); + return new TAny(); + } + const type = this.instantiate(scheme); + assert(type.kind === TypeKind.Con); + const argTypes = []; + for (const element of node.elements) { + argTypes.push(this.inferExpression(element)); + } + return new TCon(type.id, argTypes, type.displayName); + } + + case SyntaxKind.InfixExpression: + { + const scheme = this.lookup(node.operator.text); + if (scheme === null) { + this.diagnostics.add(new BindingNotFoudDiagnostic(node.operator.text, node.operator)); + return new TAny(); + } + const opType = this.instantiate(scheme); + const retType = this.createTypeVar(); + const leftType = this.inferExpression(node.left); + const rightType = this.inferExpression(node.right); + this.addConstraint( + new CEqual( + new TArrow([ leftType, rightType ], retType), + opType, + node, + ), + ); + return retType; + } + + default: + throw new Error(`Unexpected ${node}`); + + } + + } + + public inferTypeExpression(node: TypeExpression): Type { + + switch (node.kind) { + + case SyntaxKind.ReferenceTypeExpression: + { + const scheme = this.lookup(node.name.text); + if (scheme === null) { + this.diagnostics.add(new BindingNotFoudDiagnostic(node.name.text, node.name)); + return new TAny(); + } + return this.instantiate(scheme); + } + + default: + throw new Error(`Unrecognised ${node}`); + + } + + } + + public inferBindings(pattern: Pattern, type: Type, tvs: TVar[], constraints: Constraint[]): void { + + switch (pattern.kind) { + + case SyntaxKind.BindPattern: + { + this.addBinding(pattern.name.text, new Forall(tvs, constraints, type)); + break; + } + + } + + } + + public check(node: Syntax): void { + const constraints = new ConstraintSet(); + const env = new TypeEnv(); + env.set('String', new Forall([], [], this.stringType)); + env.set('Int', new Forall([], [], this.intType)); + env.set('True', new Forall([], [], this.boolType)); + env.set('False', new Forall([], [], this.boolType)); + env.set('+', new Forall([], [], new TArrow([ this.intType, this.intType ], this.intType))); + env.set('-', new Forall([], [], new TArrow([ this.intType, this.intType ], this.intType))); + env.set('*', new Forall([], [], new TArrow([ this.intType, this.intType ], this.intType))); + env.set('/', new Forall([], [], new TArrow([ this.intType, this.intType ], this.intType))); + this.typeVars.push(new TVSet); + this.constraints.push(constraints); + this.typeEnvs.push(env); + this.forwardDeclare(node); + this.infer(node); + this.solve(new CMany(constraints)); + this.typeVars.pop(); + this.constraints.pop(); + this.typeEnvs.pop(); + } + + private solve(constraint: Constraint): TVSub { + + const queue = [ constraint ]; + const solution = new TVSub(); + + while (queue.length > 0) { + + const constraint = queue.pop()!; + + switch (constraint.kind) { + + case ConstraintKind.Many: + { + for (const element of constraint.elements) { + queue.push(element); + } + break; + } + + case ConstraintKind.Equal: + { + if (!this.unify(constraint.left, constraint.right, solution)) { + this.diagnostics.add( + new UnificationFailedDiagnostic( + constraint.left.substitute(solution), + constraint.right.substitute(solution), + constraint.node + ) + ); + } + break; + } + } + + } + + return solution; + + } + + private unify(left: Type, right: Type, solution: TVSub): boolean { + + if (left.kind === TypeKind.Var && solution.has(left)) { + left = solution.get(left)!; + } + if (right.kind === TypeKind.Var && solution.has(right)) { + right = solution.get(right)!; + } + + if (left.kind === TypeKind.Var) { + if (right.hasTypeVar(left)) { + // TODO occurs check diagnostic + } + solution.set(left, right); + return true; + } + + if (right.kind === TypeKind.Var) { + return this.unify(right, left, solution); + } + + if (left.kind === TypeKind.Arrow && right.kind === TypeKind.Arrow) { + if (left.paramTypes.length !== right.paramTypes.length) { + this.diagnostics.add(new ArityMismatchDiagnostic(left, right)); + return false; + } + let success = true; + const count = left.paramTypes.length; + for (let i = 0; i < count; i++) { + if (!this.unify(left.paramTypes[i], right.paramTypes[i], solution)) { + success = false; + } + } + if (!this.unify(left.returnType, right.returnType, solution)) { + success = false; + } + return success; + } + + if (left.kind === TypeKind.Con && right.kind === TypeKind.Con) { + if (left.id !== right.id) { + return false; + } + assert(left.argTypes.length === right.argTypes.length); + const count = left.argTypes.length; + for (let i = 0; i < count; i++) { + if (!this.unify(left.argTypes[i], right.argTypes[i], solution)) { + return false; + } + } + return true; + } + + return false; + } + +} + diff --git a/src/cst.ts b/src/cst.ts index 0cf6e8f46..7f614a805 100644 --- a/src/cst.ts +++ b/src/cst.ts @@ -1,4 +1,5 @@ import { JSONObject, JSONValue } from "./util"; +import type { InferContext, Type } from "./checker" export type TextSpan = [number, number]; @@ -116,7 +117,10 @@ export const enum SyntaxKind { VariadicStructPatternElement, // Expressions + CallExpression, ReferenceExpression, + NamedTupleExpression, + StructExpression, TupleExpression, NestedExpression, ConstantExpression, @@ -156,7 +160,10 @@ export const enum SyntaxKind { export type Syntax = SourceFile + | Module + | Token | Param + | Body | StructDeclarationField | Declaration | Statement @@ -164,14 +171,64 @@ export type Syntax | TypeExpression | Pattern +function isIgnoredProperty(key: string): boolean { + return key === 'kind' || key === 'parent'; +} + abstract class SyntaxBase { + public parent: Syntax | null = null; + public abstract readonly kind: SyntaxKind; public abstract getFirstToken(): Token; public abstract getLastToken(): Token; + public getRange(): TextRange { + return new TextRange( + this.getFirstToken().getStartPosition(), + this.getLastToken().getEndPosition(), + ); + } + + public getSourceFile(): SourceFile { + let curr = this as any; + do { + if (curr.kind === SyntaxKind.SourceFile) { + return curr; + } + curr = curr.parent; + } while (curr != null); + throw new Error(`Could not find a SourceFile in any of the parent nodes of ${this}`); + } + + public setParents(): void { + + const visit = (value: any) => { + if (value === null) { + return; + } + if (Array.isArray(value)) { + value.forEach(visit); + return; + } + if (value instanceof SyntaxBase) { + value.parent = this as any; + value.setParents(); + return; + } + } + + for (const key of Object.getOwnPropertyNames(this)) { + if (isIgnoredProperty(key)) { + continue; + } + visit((this as any)[key]); + } + + } + public toJSON(): JSONObject { const obj: JSONObject = {}; @@ -179,7 +236,7 @@ abstract class SyntaxBase { obj['type'] = this.constructor.name; for (const key of Object.getOwnPropertyNames(this)) { - if (key === 'kind') { + if (isIgnoredProperty(key)) { continue; } obj[key] = encode((this as any)[key]); @@ -207,7 +264,7 @@ abstract class TokenBase extends SyntaxBase { private endPos: TextPosition | null = null; - constructor( + public constructor( private startPos: TextPosition, ) { super(); @@ -221,6 +278,13 @@ abstract class TokenBase extends SyntaxBase { throw new Error(`Trying to get the last token of an object that is a token itself.`); } + public getRange(): TextRange { + return new TextRange( + this.getStartPosition(), + this.getEndPosition(), + ); + } + public getStartPosition(): TextPosition { return this.startPos; } @@ -915,6 +979,54 @@ export class QualifiedName extends SyntaxBase { } +export class CallExpression extends SyntaxBase { + + public readonly kind = SyntaxKind.CallExpression; + + public constructor( + public func: Expression, + public args: Expression[], + ) { + super(); + } + + public getFirstToken(): Token { + return this.func.getFirstToken(); + } + + public getLastToken(): Token { + if (this.args.length > 0) { + return this.args[this.args.length-1].getLastToken(); + } + return this.func.getLastToken(); + } + +} + +export class NamedTupleExpression extends SyntaxBase { + + public readonly kind = SyntaxKind.NamedTupleExpression; + + public constructor( + public name: Constructor, + public elements: Expression[], + ) { + super(); + } + + public getFirstToken(): Token { + return this.name; + } + + public getLastToken(): Token { + if (this.elements.length > 0) { + return this.elements[this.elements.length-1].getLastToken(); + } + return this.name; + } + +} + export class ReferenceExpression extends SyntaxBase { public readonly kind = SyntaxKind.ReferenceExpression; @@ -1000,7 +1112,9 @@ export class InfixExpression extends SyntaxBase { } export type Expression - = ReferenceExpression + = CallExpression + | NamedTupleExpression + | ReferenceExpression | ConstantExpression | TupleExpression | NestedExpression @@ -1200,6 +1314,9 @@ export class LetDeclaration extends SyntaxBase { public readonly kind = SyntaxKind.LetDeclaration; + public type?: Type; + public context?: InferContext; + public constructor( public pubKeyword: PubKeyword | null, public letKeyword: LetKeyword, @@ -1317,6 +1434,7 @@ export class SourceFile extends SyntaxBase { public readonly kind = SyntaxKind.SourceFile; public constructor( + private file: TextFile, public elements: SourceFileElement[], public eof: EndOfFile, ) { @@ -1337,4 +1455,8 @@ export class SourceFile extends SyntaxBase { return this.eof; } + public getFile() { + return this.file; + } + } diff --git a/src/diagnostics.ts b/src/diagnostics.ts index 03e71ced3..3ed721a24 100644 --- a/src/diagnostics.ts +++ b/src/diagnostics.ts @@ -1,23 +1,199 @@ +import { TypeKind, Type } from "./checker"; +import { Syntax, SyntaxKind, TextFile, TextPosition, TextRange, Token } from "./cst"; +import { countDigits } from "./util"; + +const ANSI_RESET = "\u001b[0m" +const ANSI_BOLD = "\u001b[1m" +const ANSI_UNDERLINE = "\u001b[4m" +const ANSI_REVERSED = "\u001b[7m" + +const ANSI_FG_BLACK = "\u001b[30m" +const ANSI_FG_RED = "\u001b[31m" +const ANSI_FG_GREEN = "\u001b[32m" +const ANSI_FG_YELLOW = "\u001b[33m" +const ANSI_FG_BLUE = "\u001b[34m" +const ANSI_FG_CYAN = "\u001b[35m" +const ANSI_FG_MAGENTA = "\u001b[36m" +const ANSI_FG_WHITE = "\u001b[37m" + +const ANSI_BG_BLACK = "\u001b[40m" +const ANSI_BG_RED = "\u001b[41m" +const ANSI_BG_GREEN = "\u001b[42m" +const ANSI_BG_YELLOW = "\u001b[43m" +const ANSI_BG_BLUE = "\u001b[44m" +const ANSI_BG_CYAN = "\u001b[45m" +const ANSI_BG_MAGENTA = "\u001b[46m" +const ANSI_BG_WHITE = "\u001b[47m" + export class UnexpectedCharDiagnostic { public constructor( - public text: string, - public offset: number, + public file: TextFile, + public position: TextPosition, public actual: string, ) { } public format(): string { - let out = `error: unexpeced character '${this.actual}'.`; + const endPos = this.position.clone(); + endPos.advance(this.actual); + return ANSI_FG_RED + ANSI_BOLD + 'error: ' + ANSI_RESET + + `unexpeced character '${this.actual}'.\n\n` + + printExcerpt(this.file, new TextRange(this.position, endPos)) + '\n'; + } + +} + +const DESCRIPTIONS: Record = { + [SyntaxKind.StringLiteral]: 'a string literal', + [SyntaxKind.Identifier]: "an identifier", + [SyntaxKind.Comma]: "','", + [SyntaxKind.Colon]: "':'", + [SyntaxKind.Integer]: "an integer", + [SyntaxKind.LParen]: "'('", + [SyntaxKind.RParen]: "')'", + [SyntaxKind.LBrace]: "'{'", + [SyntaxKind.RBrace]: "'}'", + [SyntaxKind.LBracket]: "'['", + [SyntaxKind.RBracket]: "']'", + [SyntaxKind.ConstantExpression]: 'a constant expression', + [SyntaxKind.ReferenceExpression]: 'a reference expression', + [SyntaxKind.LineFoldEnd]: 'the end of the current line-fold', + [SyntaxKind.TupleExpression]: 'a tuple expression such as (1, 2)', + [SyntaxKind.ReferenceExpression]: 'a reference to some variable', + [SyntaxKind.NestedExpression]: 'an expression nested with parentheses', + [SyntaxKind.ConstantExpression]: 'a constant expression such as 1 or "foo"', + [SyntaxKind.NamedTupleExpression]: 'a named tuple expression', + [SyntaxKind.StructExpression]: 'a struct expression', +} + +function describeSyntaxKind(kind: SyntaxKind): string { + const desc = DESCRIPTIONS[kind]; + if (desc === undefined) { + throw new Error(`Could not describe SyntaxKind '${kind}'`); + } + return desc +} + +function describeExpected(expected: SyntaxKind[]) { + if (expected.length === 0) { + return 'nothing'; + } + let out = describeSyntaxKind(expected[0]); + if (expected.length === 1) { return out; } + for (let i = 1; i < expected.length-1; i++) { + const kind = expected[i]; + out += ', ' + describeSyntaxKind(kind); + } + out += ' or ' + describeSyntaxKind(expected[expected.length-1]) + return out; +} + +export class UnexpectedTokenDiagnostic { + + public constructor( + public file: TextFile, + public actual: Token, + public expected: SyntaxKind[], + ) { + + } + + public format(): string { + return ANSI_FG_RED + ANSI_BOLD + 'fatal: ' + ANSI_RESET + + `expected ${describeExpected(this.expected)} but got '${this.actual.text}'\n\n` + + printExcerpt(this.file, this.actual.getRange()) + '\n'; + } + +} + +export class BindingNotFoudDiagnostic { + + public constructor( + public name: string, + public node: Syntax, + ) { + + } + + public format(): string { + const file = this.node.getSourceFile().getFile(); + return ANSI_FG_RED + ANSI_BOLD + 'error: ' + ANSI_RESET + + `binding '${this.name}' was not found.\n\n` + + printExcerpt(file, this.node.getRange()) + '\n'; + } + +} + +function describeType(type: Type): string { + switch (type.kind) { + case TypeKind.Any: + return 'Any'; + case TypeKind.Con: + { + let out = type.displayName; + for (const argType of type.argTypes) { + out += ' ' + describeType(argType); + } + return out; + } + case TypeKind.Var: + return 'a' + type.id; + case TypeKind.Arrow: + { + let out = '('; + let first = true; + for (const paramType of type.paramTypes) { + if (first) first = false; + else out += ', '; + out += describeType(paramType); + } + out += ') -> ' + describeType(type.returnType); + return out; + } + case TypeKind.Tuple: + { + let out = '('; + let first = true; + for (const elementType of type.elementTypes) { + if (first) first = false; + else out += ', '; + out += describeType(elementType); + } + return out; + } + } +} + +export class UnificationFailedDiagnostic { + + public constructor( + public left: Type, + public right: Type, + public node: Syntax, + ) { + + } + + public format(): string { + const file = this.node.getSourceFile().getFile(); + return ANSI_FG_RED + ANSI_BOLD + `error: ` + ANSI_RESET + + `unification of ` + ANSI_FG_GREEN + describeType(this.left) + ANSI_RESET + + ' and ' + ANSI_FG_GREEN + describeType(this.right) + ANSI_RESET + ' failed.\n\n' + + printExcerpt(file, this.node.getRange()) + '\n'; + } } export type Diagnostic - = UnexpectedCharDiagnostic; + = UnexpectedCharDiagnostic + | BindingNotFoudDiagnostic + | UnificationFailedDiagnostic + | UnexpectedTokenDiagnostic export class Diagnostics { @@ -30,3 +206,51 @@ export class Diagnostics { } +function printExcerpt(file: TextFile, span: TextRange, { indentation = ' ', extraLineCount = 2 } = {}): string { + let out = ''; + const content = file.text; + const startLine = Math.max(0, span.start.line-1-extraLineCount) + const lines = content.split('\n') + const endLine = Math.min(lines.length, (span.end !== undefined ? span.end.line : startLine) + extraLineCount) + const gutterWidth = Math.max(2, countDigits(endLine+1)) + for (let i = startLine; i < endLine; i++) { + const line = lines[i]; + let j = firstIndexOfNonEmpty(line); + out += indentation + ' ' + ANSI_FG_BLACK + ANSI_BG_WHITE + ' '.repeat(gutterWidth-countDigits(i+1))+(i+1).toString() + ANSI_RESET + ' ' + line + '\n' + const gutter = indentation + ' ' + ANSI_FG_BLACK + ANSI_BG_WHITE + ' '.repeat(gutterWidth) + ANSI_RESET + ' ' + let mark: number; + let skip: number; + if (i === span.start.line-1 && i === span.end.line-1) { + skip = span.start.column-1; + mark = span.end.column-span.start.column; + } else if (i === span.start.line-1) { + skip = span.start.column-1; + mark = line.length-span.start.column+1; + } else if (i === span.end.line-1) { + skip = 0; + mark = span.end.column-1; + } else if (i > span.start.line-1 && i < span.end.line-1) { + skip = 0; + mark = line.length; + } else { + continue; + } + if (j <= skip) { + j = 0; + } + out += gutter + ' '.repeat(j+skip) + ANSI_FG_RED + '~'.repeat(mark-j) + ANSI_RESET + '\n' + } + return out; +} + +function firstIndexOfNonEmpty(str: string) { + let j = 0; + for (; j < str.length; j++) { + const ch = str[j]; + if (ch !== ' ' && ch !== '\t') { + break; + } + } + return j +} + diff --git a/src/parser.ts b/src/parser.ts index e16543792..a4a092b27 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,4 @@ -import { privateDecrypt } from "crypto"; import { ReferenceTypeExpression, SourceFile, @@ -35,70 +34,24 @@ import { FieldStructPatternElement, TuplePattern, InfixExpression, + TextFile, + CallExpression, + NamedTupleExpression, } from "./cst" -import { Stream, MultiDict } from "./util"; +import { Stream } from "./util"; -const DESCRIPTIONS: Record = { - [SyntaxKind.StringLiteral]: 'a string literal', - [SyntaxKind.Identifier]: "an identifier", - [SyntaxKind.Comma]: "','", - [SyntaxKind.Colon]: "':'", - [SyntaxKind.Integer]: "an integer", - [SyntaxKind.LParen]: "'('", - [SyntaxKind.RParen]: "')'", - [SyntaxKind.LBrace]: "'{'", - [SyntaxKind.RBrace]: "'}'", - [SyntaxKind.LBracket]: "'['", - [SyntaxKind.RBracket]: "']'", - [SyntaxKind.ConstantExpression]: 'a constant expression', - [SyntaxKind.ReferenceExpression]: 'a reference expression', - [SyntaxKind.LineFoldEnd]: 'the end of the current line-fold', - [SyntaxKind.TupleExpression]: 'a tuple expression such as (1, 2)', - [SyntaxKind.ReferenceExpression]: 'a reference to some variable', - [SyntaxKind.NestedExpression]: 'an expression nested with parentheses', - [SyntaxKind.ConstantExpression]: 'a constant expression such as 1 or "foo"', -} - -function describeSyntaxKind(kind: SyntaxKind): string { - const desc = DESCRIPTIONS[kind]; - if (desc === undefined) { - throw new Error(`Could not describe SyntaxKind '${kind}'`); - } - return desc -} - -function describeExpected(expected: SyntaxKind[]) { - if (expected.length === 0) { - return 'nothing'; - } - let out = describeSyntaxKind(expected[0]); - if (expected.length === 1) { - return out; - } - for (let i = 1; i < expected.length-1; i++) { - const kind = expected[i]; - out += ', ' + describeSyntaxKind(kind); - } - out += ' or ' + describeSyntaxKind(expected[expected.length-1]) - return out; -} - -class ParseError extends Error { +export class ParseError extends Error { public constructor( + public file: TextFile, public actual: Token, public expected: SyntaxKind[], ) { - super(`got '${actual.text}' but expected ${describeExpected(expected)}`); + super(`Uncaught parse error`); } } -function isConstructor(token: Token): boolean { - return token.kind === SyntaxKind.Identifier - && token.text[0].toUpperCase() === token.text[0]; -} - function isBinaryOperatorLike(token: Token): boolean { return token.kind === SyntaxKind.CustomOperator; } @@ -148,6 +101,7 @@ export class Parser { private suffixExprOperators = new Set(); public constructor( + public file: TextFile, public tokens: Stream, ) { for (const [name, mode, precedence] of EXPR_OPERATOR_TABLE) { @@ -178,7 +132,7 @@ export class Parser { } private raiseParseError(actual: Token, expected: SyntaxKind[]): never { - throw new ParseError(actual, expected); + throw new ParseError(this.file, actual, expected); } private peekTokenAfterModifiers(): Token { @@ -203,7 +157,7 @@ export class Parser { case SyntaxKind.Identifier: return this.parseReferenceTypeExpression(); default: - throw new ParseError(t0, [ SyntaxKind.Identifier ]); + this.raiseParseError(t0, [ SyntaxKind.Identifier ]); } } @@ -237,10 +191,10 @@ export class Parser { private parseExpressionWithParens(): Expression { const lparen = this.expectToken(SyntaxKind.LParen) const t1 = this.peekToken(); + // FIXME should be able to parse tuples if (t1.kind === SyntaxKind.RParen) { this.getToken(); return new TupleExpression(lparen, [], t1); - } else if (t1.kind === SyntaxKind.Constructor) { } else { const expression = this.parseExpression(); const t2 = this.expectToken(SyntaxKind.RParen); @@ -248,18 +202,45 @@ export class Parser { } } - private parseExpressionNoOperators(): Expression { + private parsePrimitiveExpression(): Expression { const t0 = this.peekToken(); switch (t0.kind) { case SyntaxKind.LParen: return this.parseExpressionWithParens(); case SyntaxKind.Identifier: return this.parseReferenceExpression(); + case SyntaxKind.Constructor: + { + this.getToken(); + const t1 = this.peekToken(); + if (t1.kind === SyntaxKind.LBrace) { + this.getToken(); + const fields = []; + let rparen; + for (;;) { + + } + return new StructExpression(t0, t1, fields, rparen); + } + const elements = []; + for (;;) { + const t2 = this.peekToken(); + if (t2.kind === SyntaxKind.LineFoldEnd + || t2.kind === SyntaxKind.RParen + || isBinaryOperatorLike(t2) + || isPrefixOperatorLike(t2)) { + break; + } + elements.push(this.parseExpression()); + } + return new NamedTupleExpression(t0, elements); + } case SyntaxKind.Integer: case SyntaxKind.StringLiteral: return this.parseConstantExpression(); default: this.raiseParseError(t0, [ + SyntaxKind.NamedTupleExpression, SyntaxKind.TupleExpression, SyntaxKind.NestedExpression, SyntaxKind.ConstantExpression, @@ -268,6 +249,25 @@ export class Parser { } } + private parseExpressionNoOperators(): Expression { + const func = this.parsePrimitiveExpression(); + const args = []; + for (;;) { + const t1 = this.peekToken(); + if (t1.kind === SyntaxKind.LineFoldEnd + || t1.kind === SyntaxKind.RParen + || isBinaryOperatorLike(t1) + || isPrefixOperatorLike(t1)) { + break; + } + args.push(this.parsePrimitiveExpression()); + } + if (args.length === 0) { + return func + } + return new CallExpression(func, args); + } + private parseUnaryExpression(): Expression { let result = this.parseExpressionNoOperators() const prefixes = []; @@ -562,7 +562,7 @@ export class Parser { const element = this.parseSourceFileElement(); elements.push(element); } - return new SourceFile(elements, eof); + return new SourceFile(this.file, elements, eof); } } diff --git a/src/scanner.ts b/src/scanner.ts index 04f3a0202..6e020d41f 100644 --- a/src/scanner.ts +++ b/src/scanner.ts @@ -27,6 +27,7 @@ import { CustomOperator, Constructor, Integer, + TextFile, } from "./cst" import { Diagnostics, UnexpectedCharDiagnostic } from "./diagnostics" import { Stream, BufferedStream, assert } from "./util"; @@ -74,6 +75,7 @@ export class Scanner extends BufferedStream { public text: string, public textOffset: number = 0, public diagnostics: Diagnostics, + private file: TextFile, ) { super(); } @@ -157,8 +159,9 @@ export class Scanner extends BufferedStream { let contents = ''; let escaping = false; for (;;) { - const c1 = this.getChar(); if (escaping) { + const startPos = this.getCurrentPosition(); + const c1 = this.getChar(); switch (c1) { case 'a': contents += '\a'; break; case 'b': contents += '\b'; break; @@ -171,11 +174,12 @@ export class Scanner extends BufferedStream { case '\'': contents += '\''; break; case '\"': contents += '\"'; break; default: - this.diagnostics.add(new UnexpectedCharDiagnostic(this.text, this.textOffset, c1)); + this.diagnostics.add(new UnexpectedCharDiagnostic(this.file, startPos, c1)); throw new ScanError(); } escaping = false; } else { + const c1 = this.getChar(); if (c1 === '"') { break; } else { @@ -199,6 +203,7 @@ export class Scanner extends BufferedStream { case '}': return new RBrace(startPos); case ',': return new Comma(startPos); case ':': return new Colon(startPos); + case '.': return new Dot(startPos); case '+': case '-': @@ -331,7 +336,7 @@ export class Scanner extends BufferedStream { default: // Nothing matched, so the current character is unrecognisable - this.diagnostics.add(new UnexpectedCharDiagnostic(this.text, this.textOffset, c0)); + this.diagnostics.add(new UnexpectedCharDiagnostic(this.file, startPos, c0)); throw new ScanError(); } diff --git a/src/util.ts b/src/util.ts index 304d9e2a5..5ca9c1a4b 100644 --- a/src/util.ts +++ b/src/util.ts @@ -5,6 +5,10 @@ export function assert(test: boolean): asserts test { } } +export function countDigits(x: number, base: number = 10) { + return x === 0 ? 1 : Math.ceil(Math.log(x+1) / Math.log(base)) +} + export type JSONValue = null | boolean | number | string | JSONArray | JSONObject export type JSONArray = Array; export type JSONObject = { [key: string]: JSONValue };