From bcec19ecc89ff39de46274116119c408ea21936e Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Mon, 24 Feb 2020 18:30:39 +0100 Subject: [PATCH] Add a simple scanner for identifiers --- src/ast.ts | 358 ++++++++++++++++++++++++++++++++ src/bin/bolt.ts | 117 +++++++++++ src/index.ts | 3 + src/scanner.ts | 251 ++++++++++++++++++++++ test/000-alpha-identifiers.bolt | 5 + 5 files changed, 734 insertions(+) create mode 100644 src/ast.ts create mode 100644 src/bin/bolt.ts create mode 100644 src/index.ts create mode 100644 src/scanner.ts create mode 100644 test/000-alpha-identifiers.bolt diff --git a/src/ast.ts b/src/ast.ts new file mode 100644 index 000000000..167c739b3 --- /dev/null +++ b/src/ast.ts @@ -0,0 +1,358 @@ + +import "reflect-metadata" + +interface JsonArray extends Array { }; +interface JsonObject { [key: string]: Json } +type Json = string | boolean | number | JsonArray | JsonObject; + +export enum SyntaxKind { + + // Tokens + + Literal, + Identifier, + Operator, + Punctuated, + + SourceFile, + + QualName, + + // Expressions + + ConstantExpr, + ReferenceExpr, + + // Type declarations + + TypeReference, + + // Declaration nodes + + VariableDecl, + FunctionDecl, + +} + +enum EdgeType { + Primitive = 1, + Node = 2, + Nullable = 4, + List = 8, +} + +export class TextFile { + + constructor(public path: string) { + + } + +} + +export class TextPos { + + constructor( + public offset: number, + public line: number, + public column: number + ) { + + } + + clone() { + return new TextPos(this.offset, this.line, this.column) + } + + toJSON(): Json { + return { + offset: this.offset, + line: this.line, + column: this.column + } + } + +} + +export class TextSpan { + + constructor( + public file: TextFile, + public start: TextPos, + public end: TextPos + ) { + + } + + clone() { + return new TextSpan(this.file, this.start.clone(), this.end.clone()); + } + + toJSON(): Json { + return { + file: this.file.path, + start: this.start.toJSON(), + end: this.end.toJSON(), + } + } + +} + +export class Literal { + + kind = SyntaxKind.Literal; + + static META = { + value: EdgeType.Primitive, + } + + constructor( + public value: string | bigint, + public span: TextSpan + ) { + + } + + toJSON(): Json { + return { + value: typeof this.value === 'bigint' ? Number(this.value) : this.value, + span: this.span.toJSON(), + } + } + +} + +export enum PunctType { + Paren, + Bracket, + Brace, +} + +export class Punctuated { + + kind = SyntaxKind.Punctuated + + static META = { + punctuator: EdgeType.Primitive, + elements: EdgeType.Node | EdgeType.List + } + + constructor( + public punctuator: PunctType, + public elements: Token[], + public span: TextSpan + ) { + + } + + toJSON(): Json { + return { + kind: 'Punctuated', + punctuator: this.punctuator, + elements: this.elements.map(element => element.toJSON()), + } + } + +} + +export class Identifier { + + kind = SyntaxKind.Identifier; + + static META = { + text: EdgeType.Primitive + } + + constructor( + public text: string, + public span: TextSpan + ) { + + } + + toJSON(): Json { + return { + kind: 'Identifier', + text: this.text, + span: this.span.toJSON(), + } + } + +} + +export class Operator { + + kind = SyntaxKind.Operator; + + static META = { + text: EdgeType.Primitive + } + + constructor( + public text: string, + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + + toJSON(): Json { + return { + kind: 'Operator', + text: this.text, + span: this.span.toJSON(), + } + } + +} + +export type Token + = Identifier + | Operator + | Literal + | Punctuated + +export class QualName { + + kind = SyntaxKind.QualName; + + static META = { + name: EdgeType.Node, + path: EdgeType.Node | EdgeType.List, + } + + constructor( + public name: string, + public path: Identifier[], + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + +} + +export class ConstantExpr { + + kind = SyntaxKind.ConstantExpr; + + static META = { + value: EdgeType.Primitive, + } + + constructor( + public value: string | bigint, + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + +} + +export type Expr + = ConstantExpr + +export class TypeReference { + + kind = SyntaxKind.TypeReference; + + static META = { + name: EdgeType.Node, + args: EdgeType.Node | EdgeType.List, + } + + constructor( + public name: QualName, + public args: TypeDecl[], + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + +} + +export type TypeDecl + = TypeReference + +export class Unexpanded { + + static META = { + tokens: EdgeType.Node | EdgeType.List + } + + constructor( + public tokens: Token[], + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + +} + +export class FunctionDecl { + + kind = SyntaxKind.FunctionDecl; + + static META = { + name: EdgeType.Node, + params: EdgeType.Node | EdgeType.List, + returnType: EdgeType.Node | EdgeType.Nullable, + body: EdgeType.Node | EdgeType.List, + } + + constructor( + public name: QualName, + public params: Param[], + public returnType: TypeDecl | null, + public body: Statement[] | null, + public span: TextSpan, + public parentNode: Syntax | null = null + ) { + + } + + +} + +export class VariableDecl { + + kind = SyntaxKind.VariableDecl; + + static META = { + bindings: EdgeType.Node, + typeDecl: EdgeType.Node | EdgeType.Nullable, + value: EdgeType.Node | EdgeType.Nullable, + } + + constructor( + public bindings: Pattern, + public typeDecl: TypeDecl | null, + public value: Expr | null, + public span: TextSpan + ) { + + } + +} + +export type Decl + = Unexpanded + | FunctionDecl + | VariableDecl + +export type Syntax + = Decl + | Expr + | SourceFile + | QualName + +export class SourceFile { + + constructor(public elements: Decl[], public span: TextSpan) { + + } + +} + diff --git a/src/bin/bolt.ts b/src/bin/bolt.ts new file mode 100644 index 000000000..db28ed50a --- /dev/null +++ b/src/bin/bolt.ts @@ -0,0 +1,117 @@ +#!/usr/bin/env node + +import * as fs from "fs" + +import yargs from "yargs" + +import { Scanner } from "../scanner" +import { Token, TextFile } from "../ast" + +function toArray(value: T): T extends Array ? T : T[] { + if (Array.isArray(value)) { + return value as T[] + } + return value === null || value === undefined ? [] : [value] +} + +function pushAll(array: T[], elements: T[]) { + for (const element of elements) { + array.push(element); + } +} + +function flatMap(array: T[], proc: (element: T) => T[]) { + let out: T[] = [] + for (const element of array) { + pushAll(out, proc(element)); + } + return out +} + +interface Hook { + timing: 'before' | 'after' + name: string + effects: string[] +} + +function parseHook(str: string): Hook { + let timing: 'before' | 'after' = 'before'; + if (str[0] === '+') { + str = str.substring(1) + timing = 'after'; + } + const [name, rawEffects] = str.split('='); + return { + timing, + name, + effects: rawEffects.split(','), + } +} + +yargs + .command( + + 'compile [files..]', + 'Compile a set of source files', + + yargs => yargs + .string('hook') + .describe('hook', 'Add a hook to a specific compile phase. See the manual for details.'), + + args => { + + const hooks: Hook[] = toArray(args.hook as string[] | string).map(parseHook); + + for (const filepath of toArray(args.files as string[] | string)) { + + const file = new TextFile(filepath); + const content = fs.readFileSync(filepath, 'utf8') + const scanner = new Scanner(file, content) + const tokens: Token[] = []; + + for (const hook of hooks) { + if (hook.name === 'scan' && hook.timing === 'before') { + for (const effect of hook.effects) { + switch (effect) { + case 'abort': + process.exit(0); + break; + default: + throw new Error(`Could not execute hook effect '${effect}.`); + } + } + } + } + + while (true) { + const token = scanner.scanToken() + if (token === null) { + break; + } + tokens.push(token); + } + + for (const hook of hooks) { + if (hook.name === 'scan' && hook.timing == 'after') { + for (const effect of hook.effects) { + switch (effect) { + case 'dump': + console.log(JSON.stringify(tokens.map(t => t.toJSON()), undefined, 2)); + break; + case 'abort': + process.exit(0); + break; + default: + throw new Error(`Could not execute hook effect '${effect}'.`) + } + } + } + } + + } + }) + + .help() + .version() + .argv + diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 000000000..b28b04f64 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,3 @@ + + + diff --git a/src/scanner.ts b/src/scanner.ts new file mode 100644 index 000000000..5af74434d --- /dev/null +++ b/src/scanner.ts @@ -0,0 +1,251 @@ + +import XRegExp from "xregexp" + +import { + TextFile, + TextPos, + TextSpan, + Identifier, + Operator, + PunctType, + Token, + Punctuated, +} from "./ast" + +function escapeChar(ch: string) { + switch (ch) { + case '\a': return '\\a'; + case '\b': return '\\b'; + case '\f': return '\\f'; + case '\n': return '\\n'; + case '\r': return '\\r'; + case '\t': return '\\t'; + case '\v': return '\\v'; + case '\0': return '\\0'; + case '\'': return '\\\''; + default: + const code = ch.charCodeAt(0); + if (code >= 0x20 && code <= 0x7E) { + return ch + } else if (code < 0x7F) { + return `\\x${code.toString(16).padStart(2, '0')}` + } else { + return `\\u${code.toString(16).padStart(4, '0')}` + } + } +} + +function getPunctType(ch: string) { + switch (ch) { + case '(': + case ')': + return PunctType.Paren; + case '[': + case ']': + return PunctType.Bracket; + case '{': + case '}': + return PunctType.Brace; + default: + throw new Error(`given character is not a valid punctuator`) + } +} + +function isClosePunct(ch: string) { + switch (ch) { + case '}': + case ']': + case ')': + return true; + default: + return false; + } +} + +function isOpenPunct(ch: string) { + switch (ch) { + case '{': + case '(': + case '[': + return true; + default: + return false; + } +} + +class ScanError extends Error { + constructor(public file: TextFile, public position: TextPos, public char: string) { + super(`${file.path}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`) + } +} + +interface Stream { + read(): T +} + +function isWhiteSpace(ch: string) { + return ch == '\n' || XRegExp('\\p{Zs}').test(ch) +} + +function isNewLine(ch: string) { + return ch == '\n' +} + +function isIdentStart(ch: string) { + return ch == '_' || XRegExp('\\p{L}').test(ch) +} + +function isIdentPart(ch: string) { + return ch == '_' || XRegExp('\\p{L}').test(ch) +} + +function isOperatorStart(ch: string) { + return /[+\-*\/%$!><]/.test(ch) +} + +function isOperatorPart(ch: string) { + return /[=+\-*\/%$!><]/.test(ch) + +} + +const EOF = '' + +export class Scanner { + + protected buffer: string[] = []; + protected currPos = new TextPos(0,1,1); + protected offset = 0; + + constructor(public file: TextFile, public input: string) { + + } + + protected readChar() { + if (this.offset == this.input.length) { + return EOF + } + return this.input[this.offset++] + } + + protected peekChar(count = 1) { + while (this.buffer.length < count) { + this.buffer.push(this.readChar()); + } + return this.buffer[count - 1]; + } + + protected getChar() { + + const ch = this.buffer.length > 0 + ? this.buffer.shift()! + : this.readChar() + + if (ch == EOF) { + return EOF + } + + if (isNewLine(ch)) { + this.currPos.line += 1; + this.currPos.column = 1; + } else { + this.currPos.column += 1; + } + this.currPos.offset += 1; + + return ch + } + + protected takeWhile(pred: (ch: string) => boolean) { + let text = this.getChar(); + while (true) { + const c0 = this.peekChar(); + if (!pred(c0)) { + break; + } + this.getChar() + text += c0; + } + return text; + } + + scanToken() { + + while (true) { + + const c0 = this.peekChar(); + + if (isWhiteSpace(c0)) { + this.getChar(); + continue; + } + + if (c0 == EOF) { + return null; + } + + const startPos = this.currPos.clone() + + if (isOpenPunct(c0)) { + + this.getChar(); + + const punctType = getPunctType(c0); + const elements: Token[] = []; + + while (true) { + + const c1 = this.peekChar(); + + if (c1 === EOF) { + throw new ScanError(this.file, this.currPos.clone(), EOF) + } + + if (isClosePunct(c1)) { + if (punctType == getPunctType(c1)) { + this.getChar(); + break; + } else { + throw new ScanError(this.file, this.currPos, c1); + } + } + + const token = this.scanToken(); + if (token === null) { + throw new ScanError(this.file, this.currPos.clone(), EOF) + } + elements.push(token!); + + } + + const endPos = this.currPos.clone(); + + return new Punctuated(punctType, elements, new TextSpan(this.file, startPos, endPos)); + + } else if (isIdentStart(c0)) { + + const name = this.takeWhile(isIdentPart); + const endPos = this.currPos.clone(); + return new Identifier(name, new TextSpan(this.file, startPos, endPos)) + + } else if (isOperatorStart(c0)) { + + const text = this.takeWhile(isOperatorPart) + const endPos = this.currPos.clone() + return new Operator(text, new TextSpan(this.file, startPos, endPos)); + + } else { + + throw new ScanError(this.file, this.currPos.clone(), c0); + + } + + } + + } + + scanTokenList() { + + } + +} + diff --git a/test/000-alpha-identifiers.bolt b/test/000-alpha-identifiers.bolt new file mode 100644 index 000000000..583320e91 --- /dev/null +++ b/test/000-alpha-identifiers.bolt @@ -0,0 +1,5 @@ +foo +bar +ThisIsReallyALongIdentifierName +x +Y