Add a simple scanner for identifiers

This commit is contained in:
Sam Vervaeck 2020-02-24 18:30:39 +01:00
parent 7cbbf788f8
commit bcec19ecc8
5 changed files with 734 additions and 0 deletions

358
src/ast.ts Normal file
View file

@ -0,0 +1,358 @@
import "reflect-metadata"
interface JsonArray extends Array<Json> { };
interface JsonObject { [key: string]: Json }
type Json = string | boolean | number | JsonArray | JsonObject;
export enum SyntaxKind {
// Tokens
Literal,
Identifier,
Operator,
Punctuated,
SourceFile,
QualName,
// Expressions
ConstantExpr,
ReferenceExpr,
// Type declarations
TypeReference,
// Declaration nodes
VariableDecl,
FunctionDecl,
}
enum EdgeType {
Primitive = 1,
Node = 2,
Nullable = 4,
List = 8,
}
export class TextFile {
constructor(public path: string) {
}
}
export class TextPos {
constructor(
public offset: number,
public line: number,
public column: number
) {
}
clone() {
return new TextPos(this.offset, this.line, this.column)
}
toJSON(): Json {
return {
offset: this.offset,
line: this.line,
column: this.column
}
}
}
export class TextSpan {
constructor(
public file: TextFile,
public start: TextPos,
public end: TextPos
) {
}
clone() {
return new TextSpan(this.file, this.start.clone(), this.end.clone());
}
toJSON(): Json {
return {
file: this.file.path,
start: this.start.toJSON(),
end: this.end.toJSON(),
}
}
}
export class Literal {
kind = SyntaxKind.Literal;
static META = {
value: EdgeType.Primitive,
}
constructor(
public value: string | bigint,
public span: TextSpan
) {
}
toJSON(): Json {
return {
value: typeof this.value === 'bigint' ? Number(this.value) : this.value,
span: this.span.toJSON(),
}
}
}
export enum PunctType {
Paren,
Bracket,
Brace,
}
export class Punctuated {
kind = SyntaxKind.Punctuated
static META = {
punctuator: EdgeType.Primitive,
elements: EdgeType.Node | EdgeType.List
}
constructor(
public punctuator: PunctType,
public elements: Token[],
public span: TextSpan
) {
}
toJSON(): Json {
return {
kind: 'Punctuated',
punctuator: this.punctuator,
elements: this.elements.map(element => element.toJSON()),
}
}
}
export class Identifier {
kind = SyntaxKind.Identifier;
static META = {
text: EdgeType.Primitive
}
constructor(
public text: string,
public span: TextSpan
) {
}
toJSON(): Json {
return {
kind: 'Identifier',
text: this.text,
span: this.span.toJSON(),
}
}
}
export class Operator {
kind = SyntaxKind.Operator;
static META = {
text: EdgeType.Primitive
}
constructor(
public text: string,
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
toJSON(): Json {
return {
kind: 'Operator',
text: this.text,
span: this.span.toJSON(),
}
}
}
export type Token
= Identifier
| Operator
| Literal
| Punctuated
export class QualName {
kind = SyntaxKind.QualName;
static META = {
name: EdgeType.Node,
path: EdgeType.Node | EdgeType.List,
}
constructor(
public name: string,
public path: Identifier[],
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
}
export class ConstantExpr {
kind = SyntaxKind.ConstantExpr;
static META = {
value: EdgeType.Primitive,
}
constructor(
public value: string | bigint,
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
}
export type Expr
= ConstantExpr
export class TypeReference {
kind = SyntaxKind.TypeReference;
static META = {
name: EdgeType.Node,
args: EdgeType.Node | EdgeType.List,
}
constructor(
public name: QualName,
public args: TypeDecl[],
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
}
export type TypeDecl
= TypeReference
export class Unexpanded {
static META = {
tokens: EdgeType.Node | EdgeType.List
}
constructor(
public tokens: Token[],
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
}
export class FunctionDecl {
kind = SyntaxKind.FunctionDecl;
static META = {
name: EdgeType.Node,
params: EdgeType.Node | EdgeType.List,
returnType: EdgeType.Node | EdgeType.Nullable,
body: EdgeType.Node | EdgeType.List,
}
constructor(
public name: QualName,
public params: Param[],
public returnType: TypeDecl | null,
public body: Statement[] | null,
public span: TextSpan,
public parentNode: Syntax | null = null
) {
}
}
export class VariableDecl {
kind = SyntaxKind.VariableDecl;
static META = {
bindings: EdgeType.Node,
typeDecl: EdgeType.Node | EdgeType.Nullable,
value: EdgeType.Node | EdgeType.Nullable,
}
constructor(
public bindings: Pattern,
public typeDecl: TypeDecl | null,
public value: Expr | null,
public span: TextSpan
) {
}
}
export type Decl
= Unexpanded
| FunctionDecl
| VariableDecl
export type Syntax
= Decl
| Expr
| SourceFile
| QualName
export class SourceFile {
constructor(public elements: Decl[], public span: TextSpan) {
}
}

117
src/bin/bolt.ts Normal file
View file

@ -0,0 +1,117 @@
#!/usr/bin/env node
import * as fs from "fs"
import yargs from "yargs"
import { Scanner } from "../scanner"
import { Token, TextFile } from "../ast"
function toArray<T>(value: T): T extends Array<any> ? T : T[] {
if (Array.isArray(value)) {
return value as T[]
}
return value === null || value === undefined ? [] : [value]
}
function pushAll<T>(array: T[], elements: T[]) {
for (const element of elements) {
array.push(element);
}
}
function flatMap<T>(array: T[], proc: (element: T) => T[]) {
let out: T[] = []
for (const element of array) {
pushAll(out, proc(element));
}
return out
}
interface Hook {
timing: 'before' | 'after'
name: string
effects: string[]
}
function parseHook(str: string): Hook {
let timing: 'before' | 'after' = 'before';
if (str[0] === '+') {
str = str.substring(1)
timing = 'after';
}
const [name, rawEffects] = str.split('=');
return {
timing,
name,
effects: rawEffects.split(','),
}
}
yargs
.command(
'compile [files..]',
'Compile a set of source files',
yargs => yargs
.string('hook')
.describe('hook', 'Add a hook to a specific compile phase. See the manual for details.'),
args => {
const hooks: Hook[] = toArray(args.hook as string[] | string).map(parseHook);
for (const filepath of toArray(args.files as string[] | string)) {
const file = new TextFile(filepath);
const content = fs.readFileSync(filepath, 'utf8')
const scanner = new Scanner(file, content)
const tokens: Token[] = [];
for (const hook of hooks) {
if (hook.name === 'scan' && hook.timing === 'before') {
for (const effect of hook.effects) {
switch (effect) {
case 'abort':
process.exit(0);
break;
default:
throw new Error(`Could not execute hook effect '${effect}.`);
}
}
}
}
while (true) {
const token = scanner.scanToken()
if (token === null) {
break;
}
tokens.push(token);
}
for (const hook of hooks) {
if (hook.name === 'scan' && hook.timing == 'after') {
for (const effect of hook.effects) {
switch (effect) {
case 'dump':
console.log(JSON.stringify(tokens.map(t => t.toJSON()), undefined, 2));
break;
case 'abort':
process.exit(0);
break;
default:
throw new Error(`Could not execute hook effect '${effect}'.`)
}
}
}
}
}
})
.help()
.version()
.argv

3
src/index.ts Normal file
View file

@ -0,0 +1,3 @@

251
src/scanner.ts Normal file
View file

@ -0,0 +1,251 @@
import XRegExp from "xregexp"
import {
TextFile,
TextPos,
TextSpan,
Identifier,
Operator,
PunctType,
Token,
Punctuated,
} from "./ast"
function escapeChar(ch: string) {
switch (ch) {
case '\a': return '\\a';
case '\b': return '\\b';
case '\f': return '\\f';
case '\n': return '\\n';
case '\r': return '\\r';
case '\t': return '\\t';
case '\v': return '\\v';
case '\0': return '\\0';
case '\'': return '\\\'';
default:
const code = ch.charCodeAt(0);
if (code >= 0x20 && code <= 0x7E) {
return ch
} else if (code < 0x7F) {
return `\\x${code.toString(16).padStart(2, '0')}`
} else {
return `\\u${code.toString(16).padStart(4, '0')}`
}
}
}
function getPunctType(ch: string) {
switch (ch) {
case '(':
case ')':
return PunctType.Paren;
case '[':
case ']':
return PunctType.Bracket;
case '{':
case '}':
return PunctType.Brace;
default:
throw new Error(`given character is not a valid punctuator`)
}
}
function isClosePunct(ch: string) {
switch (ch) {
case '}':
case ']':
case ')':
return true;
default:
return false;
}
}
function isOpenPunct(ch: string) {
switch (ch) {
case '{':
case '(':
case '[':
return true;
default:
return false;
}
}
class ScanError extends Error {
constructor(public file: TextFile, public position: TextPos, public char: string) {
super(`${file.path}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
}
}
interface Stream<T> {
read(): T
}
function isWhiteSpace(ch: string) {
return ch == '\n' || XRegExp('\\p{Zs}').test(ch)
}
function isNewLine(ch: string) {
return ch == '\n'
}
function isIdentStart(ch: string) {
return ch == '_' || XRegExp('\\p{L}').test(ch)
}
function isIdentPart(ch: string) {
return ch == '_' || XRegExp('\\p{L}').test(ch)
}
function isOperatorStart(ch: string) {
return /[+\-*\/%$!><]/.test(ch)
}
function isOperatorPart(ch: string) {
return /[=+\-*\/%$!><]/.test(ch)
}
const EOF = ''
export class Scanner {
protected buffer: string[] = [];
protected currPos = new TextPos(0,1,1);
protected offset = 0;
constructor(public file: TextFile, public input: string) {
}
protected readChar() {
if (this.offset == this.input.length) {
return EOF
}
return this.input[this.offset++]
}
protected peekChar(count = 1) {
while (this.buffer.length < count) {
this.buffer.push(this.readChar());
}
return this.buffer[count - 1];
}
protected getChar() {
const ch = this.buffer.length > 0
? this.buffer.shift()!
: this.readChar()
if (ch == EOF) {
return EOF
}
if (isNewLine(ch)) {
this.currPos.line += 1;
this.currPos.column = 1;
} else {
this.currPos.column += 1;
}
this.currPos.offset += 1;
return ch
}
protected takeWhile(pred: (ch: string) => boolean) {
let text = this.getChar();
while (true) {
const c0 = this.peekChar();
if (!pred(c0)) {
break;
}
this.getChar()
text += c0;
}
return text;
}
scanToken() {
while (true) {
const c0 = this.peekChar();
if (isWhiteSpace(c0)) {
this.getChar();
continue;
}
if (c0 == EOF) {
return null;
}
const startPos = this.currPos.clone()
if (isOpenPunct(c0)) {
this.getChar();
const punctType = getPunctType(c0);
const elements: Token[] = [];
while (true) {
const c1 = this.peekChar();
if (c1 === EOF) {
throw new ScanError(this.file, this.currPos.clone(), EOF)
}
if (isClosePunct(c1)) {
if (punctType == getPunctType(c1)) {
this.getChar();
break;
} else {
throw new ScanError(this.file, this.currPos, c1);
}
}
const token = this.scanToken();
if (token === null) {
throw new ScanError(this.file, this.currPos.clone(), EOF)
}
elements.push(token!);
}
const endPos = this.currPos.clone();
return new Punctuated(punctType, elements, new TextSpan(this.file, startPos, endPos));
} else if (isIdentStart(c0)) {
const name = this.takeWhile(isIdentPart);
const endPos = this.currPos.clone();
return new Identifier(name, new TextSpan(this.file, startPos, endPos))
} else if (isOperatorStart(c0)) {
const text = this.takeWhile(isOperatorPart)
const endPos = this.currPos.clone()
return new Operator(text, new TextSpan(this.file, startPos, endPos));
} else {
throw new ScanError(this.file, this.currPos.clone(), c0);
}
}
}
scanTokenList() {
}
}

View file

@ -0,0 +1,5 @@
foo
bar
ThisIsReallyALongIdentifierName
x
Y