Integrate with new AST system

This commit is contained in:
Sam Vervaeck 2020-05-10 15:56:34 +02:00
parent 03892cf149
commit 7aa86d2789
20 changed files with 2571 additions and 811 deletions

View file

@ -1,8 +1,10 @@
TREEGEN_FILES = spec/ast.txt lib/bin/bolt-treegen.js lib/treegen/parser.js lib/treegen/index.js lib/treegen/util.js src/treegen/ast-template.js
all: lib/ast.js
bolt bundle test.bolt
lib/ast.js: spec/ast.txt lib/treegen/parser.js lib/bin/bolt-treegen.js lib/treegen
lib/ast.js: $(TREEGEN_FILES)
@echo "Generating AST definitions ..."
@mkdir -p lib/
@chmod +x lib/bin/*.js

View file

@ -1,16 +1,13 @@
@language Bolt;
@language JS;
// Bolt language AST definitions
type BoltValue = Integer | bool | String;
node FunctionBody;
enum BoltDeclarationModifiers {
Mutable = 0x1,
Public = 0x2,
IsType = 0x4,
}
node BoltToken;
node BoltStringLiteral > BoltToken {
@ -18,7 +15,7 @@ node BoltStringLiteral > BoltToken {
}
node BoltIntegerLiteral > BoltToken {
value: usize,
value: Int,
}
node BoltSymbol > BoltToken {
@ -37,6 +34,7 @@ node BoltEOS > BoltToken;
node BoltComma > BoltToken;
node BoltSemi > BoltToken;
node BoltColon > BoltToken;
node BoltDot > BoltToken;
node BoltDotDot > BoltToken;
node BoltRArrow > BoltToken;
@ -63,8 +61,6 @@ node BoltParenthesized > BoltPunctuated;
node BoltBraced > BoltPunctuated;
node BoltBracketed > BoltPunctuated;
node BoltSourceElement;
node BoltSourceFile {
elements: Vec<BoltSourceElement>,
}
@ -74,6 +70,10 @@ node BoltQualName {
name: BoltSymbol,
}
node BoltSentence > BoltSourceElement {
tokens: Vec<BoltToken>,
}
node BoltTypeNode;
node BoltReferenceTypeNode > BoltTypeNode {
@ -88,7 +88,7 @@ node BoltBindPattern > BoltPattern {
}
node BoltTypePattern > BoltPattern {
typeNode: BoltTypeNode,
type: BoltTypeNode,
nestedPattern: BoltPattern,
}
@ -111,11 +111,16 @@ node BoltRecordPatternField {
}
node BoltRecordPattern > BoltPattern {
name: BoltTypeNode,
fields: Vec<BoltRecordPatternField>,
}
node BoltExpression;
node BoltReferenceExpression > BoltExpression {
name: BoltQualName,
}
node BoltCallExpression > BoltExpression {
operator: BoltExpression,
operands: Vec<BoltExpression>,
@ -152,7 +157,7 @@ node BoltConstantExpression > BoltExpression {
value: BoltValue,
}
node BoltStatement;
node BoltStatement > BoltSourceElement;
node BoltReturnStatement > BoltStatement {
value: Option<BoltExpression>,
@ -166,40 +171,52 @@ node BoltExpressionStatement > BoltStatement {
expression: BoltExpression,
}
node BoltModule {
modifiers: BoltDeclarationModifiers,
name: BoltQualName,
elements: BoltSourceElement,
}
node BoltParameter {
index: usize,
bindings: BoltPattern,
typeNode: Option<BoltTypeNode>,
type: Option<BoltTypeNode>,
defaultValue: Option<BoltExpression>,
}
node BoltDeclaration;
node BoltDeclaration > BoltSourceElement;
enum BoltDeclarationModifiers {
Mutable = 0x1,
Public = 0x2,
IsType = 0x4,
}
node BoltNewTypeDeclaration > BoltDeclaration {
modifiers: BoltDeclarationModifiers,
name: BoltIdentifier,
}
node BoltModule > BoltDeclaration {
modifiers: BoltDeclarationModifiers,
name: BoltQualName,
elements: Vec<BoltSourceElement>,
}
node BoltFunctionDeclaration > BoltDeclaration {
modifiers: BoltDeclarationModifiers,
name: BoltSymbol,
params: Vec<BoltParameter>,
type: Option<BoltTypeNode>,
returnType: Option<BoltTypeNode>,
body: BoltExpression,
}
node BoltForeignFunctionDeclaration > BoltDeclaration {
modifiers: BoltDeclarationModifiers,
target: String,
name: BoltSymbol,
params: Vec<BoltParameter>,
type: Option<BoltTypeNode>,
returnType: Option<BoltTypeNode>,
body: FunctionBody,
}
node BoltVariableDeclaration > BoltDeclaration {
modifiers: BoltDeclarationModifiers,
name: BoltSymbol,
bindings: BoltPattern,
type: Option<BoltTypeNode>,
value: Option<BoltExpression>,
}
@ -220,6 +237,8 @@ node BoltRecordDeclarationField {
type: BoltTypeNode,
}
node BoltSourceElement;
node BoltRecordDeclaration > BoltDeclaration {
name: BoltQualName,
fields: Vec<BoltRecordDeclarationField>,
@ -241,8 +260,8 @@ node JSIdentifier > JSToken {
node JSPattern;
node JSBindPattern {
name: JSIdentifier,
node JSBindPattern > JSPattern {
name: String,
}
node JSExpression;
@ -298,14 +317,46 @@ node JSReferenceExpression > JSExpression {
node JSStatement;
node JSExpressionStatement > JSStatement {
expression: JSExpression,
}
node JSConditionalStatement > JSStatement {
test: JSExpression,
consequent: Vec<JSStatement>,
alternate: Vec<JSStatement>,
}
node JSParameter {
index: usize,
bindings: JSPattern,
defaultValue: Option<JSExpression>,
}
node JSDeclaration;
enum JSDeclarationModifiers {
IsExported = 0x1,
}
node JSFunctionDeclaration > JSDeclaration {
modifiers: JSDeclarationModifiers,
name: JSIdentifier,
params: Vec<JSParameter>,
body: Vec<JSStatement>,
}
node JSArrowFunctionDeclaration > JSDeclaration {
name: JSIdentifier,
params: Vec<JSParameter>,
body: JSExpression,
}
node JSLetDeclaration > JSDeclaration {
bindings: JSPattern,
value: Option<JSExpression>,
}
node JSSourceFile {
elements: Vec<JSSourceElement>,
}

1153
src/ast.d.ts vendored

File diff suppressed because it is too large Load diff

View file

@ -4,8 +4,9 @@ import * as path from "path"
import * as fs from "fs"
import { parse, SyntaxError } from "../treegen/parser"
import { Syntax, Declaration, NodeDeclaration, TypeDeclaration, EnumDeclaration, TypeNode, NodeField } from "../ast"
import { FileWriter } from "../util"
import { Declaration } from "../treegen/ast"
import { generateAST } from "../treegen/index"
import { getFileStem } from "../util"
import minimist from "minimist"
const PACKAGE_ROOT = path.join(__dirname, '..', '..');
@ -33,205 +34,3 @@ for (const filename of argv._) {
fs.writeFileSync(dtsFilePath, dtsFile, 'utf8');
}
interface FastStringMap<T> { [key: string]: T }
function generateAST(decls: Declaration[]) {
let jsFile = new FileWriter();
let dtsFile = new FileWriter();
let i;
// Sort declarations by category
const nodeDecls: NodeDeclaration[] = decls.filter(decl => decl.type === 'NodeDeclaration') as NodeDeclaration[];
const typeDecls: TypeDeclaration[] = decls.filter(decl => decl.type === 'TypeDeclaration') as TypeDeclaration[];
const enumDecls: EnumDeclaration[] = decls.filter(decl => decl.type === 'EnumDeclaration') as EnumDeclaration[];
const declByName: FastStringMap<Declaration> = Object.create(null);
i = 0;
for (const decl of decls) {
decl.index = i++;
declByName[decl.name] = decl;
}
// Generate a mapping from parent node to child node
// This makes it easy to generate union types for the intermediate nodes.
const childrenOf: FastStringMap<string[]> = Object.create(null);
for (const nodeDecl of nodeDecls) {
for (const parentName of nodeDecl.parents) {
if (childrenOf[parentName] === undefined) {
childrenOf[parentName] = [];
}
childrenOf[parentName].push(nodeDecl.name);
}
}
// After we're done mappping parents to children, we can use isLeafNode()
// to store the nodes we will be iterating most frequently on.
const leafNodes: NodeDeclaration[] = nodeDecls.filter(decl => isLeafNode(decl.name));
// Write a JavaScript file that contains all AST definitions.
jsFile.write(`\nconst NODE_TYPES = {\n`);
jsFile.indent();
for (const decl of leafNodes) {
if (decl.type === 'NodeDeclaration' && isLeafNode(decl.name)) {
jsFile.write(`'${decl.name}': {\n`);
jsFile.indent();
jsFile.write(`index: ${decl.index},\n`);
jsFile.write(`fields: new Map([\n`);
jsFile.indent();
for (const field of getAllFields(decl)) {
jsFile.write(`['${field.name}', ${JSON.stringify(jsonify(field.typeNode))}],\n`);
}
jsFile.dedent();
jsFile.write(']),\n');
jsFile.dedent();
jsFile.write('},\n');
}
}
jsFile.dedent();
jsFile.write('};\n\n');
jsFile.write(fs.readFileSync(path.join(PACKAGE_ROOT, 'src', 'treegen', 'ast-template.js'), 'utf8'));
jsFile.write(`if (typeof module !== 'undefined') {\n module.exports = exported;\n}\n\n`)
// Write corresponding TypeScript declarations
dtsFile.write(`\nexport const enum SyntaxKind {\n`);
for (const decl of leafNodes) {
dtsFile.write(` ${decl.name} = ${decl.index}\n`);
}
dtsFile.write(`}\n\n`);
for (const decl of leafNodes) {
dtsFile.write(`export function create${decl.name}(`);
for (const field of getAllFields(decl)) {
dtsFile.write(`${field.name}: ${emitTypeScriptType(field.typeNode)}, `);
}
dtsFile.write(`span: TextSpan | null = null, origNodes: SyntaxRange | null = null);\n`);
}
return {
jsFile: jsFile.currentText,
dtsFile: dtsFile.currentText,
};
// Below are some useful functions
function hasDeclarationNamed(name: string): boolean {
return name in declByName;
}
function emitTypeScriptType(typeNode: TypeNode): string {
if (typeNode.type === 'ReferenceTypeNode') {
if (hasDeclarationNamed(typeNode.name)) {
return typeNode.name;
} else if (typeNode.name === 'Option') {
return `${emitTypeScriptType(typeNode.typeArgs[0])} | null`;
} else if (typeNode.name === 'Vec') {
return `${emitTypeScriptType(typeNode.typeArgs[0])}[]`;
} else if (typeNode.name === 'String') {
return `string`;
} else if (typeNode.name === 'Int') {
return `bigint`;
} else if (typeNode.name === 'usize') {
return `number`;
} else if (typeNode.name === 'bool') {
return `boolean`;
} else {
throw new Error(`Could not emit TypeScript type for reference type node named ${typeNode.name}`);
}
} else if (typeNode.type === 'UnionTypeNode') {
return typeNode.elements.map(emitTypeScriptType).join(' | ');
}
//throw new Error(`Could not emit TypeScript type for type node ${typeNode.type}`);
}
function getAllFields(nodeDecl: NodeDeclaration) {
let out: NodeField[] = [];
pushAll(out, nodeDecl.fields);
for (const parentName of nodeDecl.parents) {
const parentDecl = getDeclarationNamed(parentName);
if (parentDecl.type !== 'NodeDeclaration') {
throw new Error(`Parent declaration '${parentName}' of '${nodeDecl.name}' must be a node declaration.`);
}
pushAll(out, getAllFields(parentDecl));
}
return out;
}
function getDeclarationNamed(name: string): Declaration {
const decl = declByName[name];
if (decl === undefined) {
throw new Error(`Declaration '${name}' was not found in any of the definition files.`);
}
return decl;
}
function isLeafNode(name: string): boolean {
const decl = getDeclarationNamed(name);
if (decl.type !== 'NodeDeclaration') {
throw new Error(`Declaration '${name}' is not a node declaration.`)
}
return childrenOf[name] === undefined || childrenOf[name].length === 0;
}
}
function pushAll<T>(arr: T[], els: T[]): void {
for (const el of els) {
arr.push(el);
}
}
function isNode(value: any): value is Syntax {
return typeof value === 'object' && value !== null && value.__IS_NODE;
}
function jsonify(value: any) {
function visitNode(node: any) {
const obj: any = {};
for (const key of Object.keys(node)) {
if (key !== 'type' && key !== 'span' && key !== '__IS_NODE') {
const value = node[key];
if (Array.isArray(value)) {
obj[key] = value.map(visit);
} else {
obj[key] = visit(value);
}
}
}
return obj;
}
function visit(value: any) {
if (isNode(value)) {
return visitNode(value);
} else {
return value;
}
}
return visit(value);
}
function stripSuffix(str: string, suffix: string): string {
if (!str.endsWith(suffix)) {
return str;
}
return str.substring(0, str.length-suffix.length);
}
function getFileStem(filepath: string) {
return path.basename(filepath).split('.')[0];
}

View file

@ -9,7 +9,7 @@ import * as fs from "fs-extra"
import yargs from "yargs"
import { Program } from "../program"
import { TextFile } from "../ast"
import { TextFile } from "../text"
global.debug = function (value: any) {
console.error(require('util').inspect(value, { depth: Infinity, colors: true }))
@ -70,7 +70,7 @@ yargs
args => {
const files = toArray(args.path as string[] | string).map(filepath => new TextFile(filepath, args['work-dir']));
const files = toArray(args.files as string[] | string).map(filepath => new TextFile(filepath, args['work-dir']));
const program = new Program(files)
program.compile("JS");

View file

@ -1,12 +1,13 @@
import {
Syntax,
kindToString,
SyntaxKind,
ImportDecl,
Patt,
BoltImportDeclaration,
BoltPattern,
} from "./ast"
import { FastStringMap } from "./util"
import { FastStringMap, getFullTextOfQualName } from "./util"
export class Type {
@ -91,14 +92,14 @@ function getFullName(node: Syntax) {
let curr: Syntax | null = node;
while (true) {
switch (curr.kind) {
case SyntaxKind.Identifier:
case SyntaxKind.BoltIdentifier:
out.unshift(curr.text)
break;
case SyntaxKind.Module:
out.unshift(curr.name.fullText);
case SyntaxKind.BoltModule:
out.unshift(getFullTextOfQualName(curr.name));
break;
case SyntaxKind.RecordDecl:
out.unshift(curr.name.fullText)
case SyntaxKind.BoltRecordDeclaration:
out.unshift(getFullTextOfQualName(curr.name))
break;
}
curr = curr.parentNode;
@ -118,7 +119,7 @@ export class TypeChecker {
constructor() {
}
protected inferTypeFromUsage(bindings: Patt, body: Body) {
protected inferTypeFromUsage(bindings: BoltPattern, body: Body) {
return anyType;
}
@ -130,18 +131,18 @@ export class TypeChecker {
switch (node.kind) {
case SyntaxKind.RefExpr:
case SyntaxKind.BoltReferenceExpression:
return anyType;
case SyntaxKind.ConstExpr:
case SyntaxKind.BoltConstantExpression:
return node.value.type;
case SyntaxKind.NewTypeDecl:
case SyntaxKind.BoltNewTypeDeclaration:
console.log(getFullName(node.name))
this.symbols[getFullName(node.name)] = new PrimType();
return noneType;
case SyntaxKind.FuncDecl:
case SyntaxKind.BoltFunctionDeclaration:
let returnType = anyType;
if (node.returnType !== null) {
returnType = this.getTypeOfNode(node.returnType)
@ -161,29 +162,30 @@ export class TypeChecker {
})
return new FunctionType(paramTypes, returnType);
case SyntaxKind.TypeRef:
const reffed = this.getTypeNamed(node.name.fullText);
case SyntaxKind.BoltReferenceTypeNode:
const name = getFullTextOfQualName(node.name);
const reffed = this.getTypeNamed(name);
if (reffed === null) {
throw new Error(`Could not find a type named '${node.name.fullText}'`);
throw new Error(`Could not find a type named '${name}'`);
}
return reffed;
case SyntaxKind.RecordDecl:
case SyntaxKind.BoltRecordDeclaration:
const typ = new RecordType(map(node.fields, ([name, typ]) => ([name.text, typ])));
const typ = new RecordType(map(node.fields, field => ([field.name.text, this.getTypeOfNode(field.type)])));
this.symbols[getFullName(node)] = typ;
return typ;
case SyntaxKind.Param:
if (node.typeDecl !== null) {
return this.getTypeOfNode(node.typeDecl)
case SyntaxKind.BoltParameter:
if (node.typeNode !== null) {
return this.getTypeOfNode(node.typeNode)
}
return anyType;
default:
throw new Error(`Could not derive type of ${SyntaxKind[node.kind]}`)
throw new Error(`Could not derive type of ${kindToString(node.kind)}`)
}
@ -210,12 +212,12 @@ export class TypeChecker {
switch (node.kind) {
case SyntaxKind.Sentence:
case SyntaxKind.RecordDecl:
case SyntaxKind.NewTypeDecl:
case SyntaxKind.BoltSentence:
case SyntaxKind.BoltRecordDeclaration:
case SyntaxKind.BoltNewTypeDeclaration:
break;
case SyntaxKind.FuncDecl:
case SyntaxKind.BoltFunctionDeclaration:
if (node.body !== null) {
if (Array.isArray(node.body)) {
for (const element of node.body) {
@ -225,30 +227,30 @@ export class TypeChecker {
}
break;
case SyntaxKind.RefExpr:
case SyntaxKind.BoltReferenceExpression:
// TODO implement this
break;
case SyntaxKind.Module:
case SyntaxKind.SourceFile:
case SyntaxKind.BoltModule:
case SyntaxKind.BoltSourceFile:
for (const element of node.elements) {
this.check(element)
}
break;
default:
throw new Error(`Could not type-check node ${SyntaxKind[node.kind]}`)
throw new Error(`Could not type-check node ${kindToString(node.kind)}`)
}
}
getImportedSymbols(node: ImportDecl) {
getImportedSymbols(node: BoltImportDeclaration) {
return [{ name: 'fac' }]
}
getScope(node: Syntax): Scope {
while (node.kind !== SyntaxKind.FuncDecl && node.kind !== SyntaxKind.SourceFile) {
while (node.kind !== SyntaxKind.BoltFunctionDeclaration && node.kind !== SyntaxKind.BoltSourceFile) {
node = node.parentNode!;
}
if (this.scopes.has(node)) {

View file

@ -9,13 +9,31 @@ import {
import {
Syntax,
SyntaxKind,
SourceFile,
Stmt,
Expr,
Decl,
isExpr,
kindToString,
BoltSourceFile,
BoltStatement,
BoltExpression,
BoltDeclaration,
BoltBindPattern,
JSExpression,
JSStatement,
JSSourceElement,
createJSExpressionStatement,
createJSSourceFile,
createJSCallExpression,
createJSReferenceExpression,
createJSConstantExpression,
createJSLetDeclaration,
createJSIdentifier,
createJSFunctionDeclaration,
isBoltExpression,
createJSBindPattern,
JSDeclarationModifiers,
JSParameter,
} from "./ast"
import { getFullTextOfQualName, hasPublicModifier } from "./util"
import { Program } from "./program"
export interface CompilerOptions {
@ -33,22 +51,16 @@ export class Compiler {
readonly target: string;
constructor(public program: Program, public checker: TypeChecker, options: CompilerOptions) {
this.target = options.target
this.target = options.target;
}
compile(files: SourceFile[]) {
compile(files: BoltSourceFile[]) {
return files.map(s => {
const body: (Decl | Stmt | Expr)[] = [];
const body: JSSourceElement[] = [];
for (const element of s.elements) {
this.compileDecl(element, body);
}
return {
type: 'Program',
body,
loc: {
source: s.span!.file.path
}
}
return createJSSourceFile(body, s.span, [s, s]);
});
}
@ -56,112 +68,98 @@ export class Compiler {
switch (node.kind) {
case SyntaxKind.CallExpr:
case SyntaxKind.BoltCallExpression:
const compiledOperator = this.compileExpr(node.operator, preamble);
const compiledArgs = node.args.map(a => this.compileExpr(a, preamble))
return {
type: 'CallExpression',
callee: compiledOperator,
arguments: compiledArgs,
};
const compiledArgs = node.operands.map(a => this.compileExpr(a, preamble))
return createJSCallExpression(
compiledOperator,
compiledArgs,
node.span,
[node, node],
);
case SyntaxKind.RefExpr:
return {
type: 'Identifier',
name: node.name.name.text,
}
case SyntaxKind.BoltReferenceExpression:
return createJSReferenceExpression(
getFullTextOfQualName(node.name),
node.span,
[node, node],
);
case SyntaxKind.ConstExpr:
return {
type: 'Literal',
value: node.value,
}
case SyntaxKind.BoltConstantExpression:
return createJSConstantExpression(
node.value,
node.span,
[node, node]
);
default:
throw new Error(`Could not compile expression node ${SyntaxKind[node.kind]}`)
throw new Error(`Could not compile expression node ${kindToString(node.kind)}`)
}
}
protected compileDecl(node: Syntax, preamble: Syntax[]): Expr | undefined {
protected compileDecl(node: Syntax, preamble: Syntax[]) {
console.log(`compiling ${SyntaxKind[node.kind]}`)
console.log(`compiling ${kindToString(node.kind)}`)
if (isExpr(node)) {
const compiled = this.compileExpr(node, preamble);
preamble.push({
type: 'ExpressionStatement',
expression: compiled
})
return;
}
//if (isBoltExpression(node)) {
// const compiled = this.compileExpr(node, preamble);
// preamble.push(createJSExpressionStatement(compiled));
// return;
//}
switch (node.kind) {
case SyntaxKind.Module:
case SyntaxKind.BoltModule:
for (const element of node.elements) {
this.compileDecl(element, preamble);
}
break;
case SyntaxKind.ImportDecl:
preamble.push({
type: 'ImportDeclaration',
source: { type: 'Literal', value: node.file + '.mjs' },
specifiers: this.checker.getImportedSymbols(node).map(s => ({
type: 'ImportSpecifier',
imported: { type: 'Identifier', name: s.name },
local: { type: 'Identifier', name: s.name },
})),
});
case SyntaxKind.BoltExpressionStatement:
preamble.push(this.compileExpr(node.expression, preamble));
break;
case SyntaxKind.VarDecl:
case SyntaxKind.BoltImportDeclaration:
// TODO
break;
case SyntaxKind.BoltVariableDeclaration:
const compiledValue = node.value !== null ? this.compileExpr(node.value, preamble) : null;
preamble.push({
type: 'VariableDeclaration',
kind: 'let',
declarations: [{
type: 'VariableDeclarator',
id: { type: 'Identifier', name: node.bindings.name.text },
init: compiledValue
}]
});
preamble.push(
createJSLetDeclaration(
createJSBindPattern((node.bindings as BoltBindPattern).name, node.bindings.span, [node.bindings, node.bindings]),
compiledValue,
node.span,
[node, node],
),
);
break;
case SyntaxKind.FuncDecl:
const params = [];
if (node.body !== null) {
let body;
if (node.target === this.target) {
body = node.body;
} else if (node.target === 'Bolt') {
let body: Stmt[] = [];
for (const stmt in node.body) {
this.compileDecl(stmt, body)
}
}
let result = {
type: 'FunctionDeclaration',
id: { type: 'Identifier', name: node.name.name.text },
params: node.params.map(p => ({ type: 'Identifier', name: p.bindings.name.text })),
body: {
type: 'BlockStatement',
body: node.body
}
}
if (node.isPublic) {
result = {
type: 'ExportNamedDeclaration',
declaration: result,
case SyntaxKind.BoltForeignFunctionDeclaration:
if (node.target === this.target && node.body !== null) {
const params: JSParameter[] = [];
let body: JSStatement[] = [];
for (const param of node.params) {
params.push(this.compilePattern(param.bindings, body));
}
let result = createJSFunctionDeclaration(
0,
createJSIdentifier(node.name.text, node.name.span, [node.name, node.name]),
params,
body,
node.span,
[node, node],
);
if (hasPublicModifier(node)) {
result.modifiers |= JSDeclarationModifiers.IsExported;;
}
preamble.push(result)
}
break;
default:
throw new Error(`Could not compile node ${SyntaxKind[node.kind]}`);
throw new Error(`Could not compile node ${kindToString(node.kind)}`);
}

View file

@ -1,5 +1,5 @@
import { Syntax, SyntaxKind, isJSNode } from "./ast"
import { Syntax, SyntaxKind, kindToString } from "./ast"
export class Emitter {
@ -7,7 +7,7 @@ export class Emitter {
switch (node.kind) {
case SyntaxKind.SourceFile:
case SyntaxKind.JSSourceFile:
let out = ''
for (const element of node.elements) {
out += this.emit(element);
@ -15,7 +15,7 @@ export class Emitter {
return out;
default:
throw new Error(`Could not emit source code for ${SyntaxKind[node.kind]}`)
throw new Error(`Could not emit source code for ${kindToString(node.kind)}`)
}

View file

@ -3,8 +3,13 @@
// easily specify how the next expansion should happen. Just a thought.
import {
SyntaxKind,
kindToString,
BoltSyntax,
BoltSentence,
createBoltEOS,
createBoltRecordPattern,
createBoltExpressionPattern,
createBoltIdentifier,
createBoltReferenceTypeNode,
createBoltConstantExpression,
@ -12,22 +17,41 @@ import {
createBoltQualName,
createBoltTypePattern,
createBoltBindPattern,
createBoltMatchExpression,
createBoltMatchArm,
createBoltModule,
createBoltSourceFile,
BoltPattern,
BoltSourceElement,
BoltReferenceTypeNode,
createBoltRecordDeclaration,
createBoltRecordDeclarationField,
isBoltSourceElement,
createBoltExpressionStatement,
isBoltExpression,
} from "./ast"
import { BoltTokenStream } from "./util"
import { TextSpan } from "./text"
import { TypeChecker } from "./checker"
import { Parser, ParseError } from "./parser"
import { Evaluator, TRUE, FALSE } from "./evaluator"
import { StreamWrapper, setOrigNodeRange, BoltTokenStream } from "./util"
interface Transformer {
pattern: BoltPattern;
transform: (node: BoltTokenStream) => BoltSyntax;
}
function createSimpleBoltReferenceTypeNode(text: string) {
function createTokenStream(node: BoltSentence) {
return new StreamWrapper(
node.tokens,
() => createBoltEOS(new TextSpan(node.span!.file, node.span!.end.clone(), node.span!.end.clone()))
);
}
function createSimpleBoltReferenceTypeNode(text: string): BoltReferenceTypeNode {
const ids = text.split('.').map(name => createBoltIdentifier(name))
return createBoltReferenceTypeNode(createBoltQualName(ids[ids.length-1], ids.slice(0, -1)), [])
return createBoltReferenceTypeNode(createBoltQualName(ids.slice(0, -1), ids[ids.length-1]), [])
}
/// This is actually a hand-parsed version of the following:
@ -44,33 +68,34 @@ function createSimpleBoltReferenceTypeNode(text: string) {
/// }
/// ],
/// }
const PATTERN_SYNTAX: Pattern =
createBoltRecordPattern(
createSimpleBoltReferenceTypeNode('Bolt.AST.Sentence'),
[{
name: createBoltIdentifier('elements'),
pattern: createBoltTuplePattern([
createBoltRecordPattern(
createSimpleBoltReferenceTypeNode('Bolt.AST.Identifier'),
[{
name: createBoltIdentifier('text'),
pattern: createBoltConstantExpression('syntax')
}]
),
createBoltRecordPattern(
createSimpleBoltReferenceTypeNode('Bolt.AST.Braced'),
[{
name: createBoltIdentifier('elements'),
pattern: createBoltTuplePattern([
createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))),
createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))),
createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression')))
])
}]
)
])
}]
)
//const PATTERN_SYNTAX: BoltPattern =
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Sentence'),
// [
// createBoltRecordDeclarationField(
// createBoltIdentifier('elements'),
// createBoltTuplePattern([
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Identifier'),
// [{
// name: createBoltIdentifier('text'),
// pattern: createBoltConstantExpression('syntax')
// }]
// ),
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Braced'),
// [{
// name: createBoltIdentifier('elements'),
// pattern: createBoltTuplePattern([
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression')))
// ])
// }]
// )
// ])
// )]
// )
export class Expander {
@ -83,58 +108,80 @@ export class Expander {
// })
}
getFullyExpanded(node: Syntax): Syntax {
getFullyExpanded(node: BoltSyntax): BoltSyntax {
if (node.kind === SyntaxKind.SourceFile) {
if (node.kind === SyntaxKind.BoltSourceFile) {
const expanded: (Decl | Stmt)[] = [];
const expanded: BoltSourceElement[] = [];
let didExpand = false;
for (const element of node.elements) {
let newElement = this.getFullyExpanded(element);
// Automatically lift top-level expressions into so that they are valid.
if (isBoltExpression(newElement)) {
newElement = createBoltExpressionStatement(newElement);
}
// From this point, newElement really should be a BoltSourceElement
if (!isBoltSourceElement(newElement)) {
throw new Error(`Expanded element ${kindToString(newElement.kind)} is not valid in a top-level context.`);
}
if (newElement !== element) {
didExpand = true;
}
expanded.push(newElement as Decl | Stmt)
expanded.push(newElement);
}
if (!didExpand) {
return node;
}
return new SourceFile(expanded, null, node);
const newSourceFile = createBoltSourceFile(expanded);
setOrigNodeRange(newSourceFile, node, node);
return newSourceFile;
} else if (node.kind == SyntaxKind.Module) {
} else if (node.kind == SyntaxKind.BoltModule) {
const expanded = [];
const expanded: BoltSourceElement[] = [];
let didExpand = false;
for (const element of node.elements) {
let newElement = this.getFullyExpanded(element);
if (!isBoltSourceElement(newElement)) {
throw new Error(`Expanded element is invalid in a module context.`);
}
if (newElement !== element) {
didExpand = true;
}
expanded.push(newElement as Decl | Stmt)
expanded.push(newElement);
}
if (!didExpand) {
return node;
}
return new Module(node.isPublic, node.name, expanded, null, node);
const newModule = createBoltModule(0, node.name, expanded);
setOrigNodeRange(newModule, node, node);
return newModule;
} else if (node.kind === SyntaxKind.Sentence) {
} else if (node.kind === SyntaxKind.BoltSentence) {
let newElement;
const tokens = node.toTokenStream();
const tokens = createTokenStream(node);
try {
newElement = this.parser.parseSourceElement(tokens)
setOrigNodeRange(newElement, node, node);
} catch (e) {
@ -148,12 +195,18 @@ export class Expander {
while (true) {
let didExpand = false;
const expanded: Syntax[] = [];
const tokens = node.toTokenStream();
const expanded: BoltSyntax[] = [];
const tokens = createTokenStream(node);
for (const transformer of this.transformers) {
if (this.evaluator.eval(new MatchExpr(new ConstExpr(this.evaluator.createValue(node)), [
[transformer.pattern, new ConstExpr(TRUE)],
[new ConstExpr(TRUE), new ConstExpr(FALSE)]
if (this.evaluator.eval(createBoltMatchExpression(createBoltConstantExpression(this.evaluator.createValue(node)), [
createBoltMatchArm(
transformer.pattern,
createBoltConstantExpression(TRUE)
),
createBoltMatchArm(
createBoltExpressionPattern(createBoltConstantExpression(TRUE)),
createBoltConstantExpression(FALSE)
)
]))) {
expanded.push(transformer.transform(tokens))
didExpand = true;

29
src/parsegen/parser.d.ts vendored Normal file
View file

@ -0,0 +1,29 @@
export interface ParseOptions {
[key: string]: any;
}
export function parse(input: string, opts?: ParseOptions): any;
export interface Location {
line: number;
column: number;
offset: number;
}
interface LocationRange {
start: Location,
end: Location
}
export class SyntaxError {
line: number;
column: number;
offset: number;
location: LocationRange;
expected:any[];
found:any;
name:string;
message:string;
}

513
src/parsegen/parser.pegjs Normal file
View file

@ -0,0 +1,513 @@
// PEG.js Grammar
// ==============
//
// PEG.js grammar syntax is designed to be simple, expressive, and similar to
// JavaScript where possible. This means that many rules, especially in the
// lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some
// are directly taken or adapted from the JavaScript example grammar (see
// examples/javascript.pegjs).
//
// Limitations:
//
// * Non-BMP characters are completely ignored to avoid surrogate pair
// handling.
//
// * One can create identifiers containing illegal characters using Unicode
// escape sequences. For example, "abcd\u0020efgh" is not a valid
// identifier, but it is accepted by the parser.
//
// Both limitations could be resolved, but the costs would likely outweigh
// the benefits.
//
// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
{
// Used as a shorthand property name for `LabeledExpression`
const pick = true;
// Used by `LabelIdentifier` to disallow the use of certain words as labels
const RESERVED_WORDS = {};
// Populate `RESERVED_WORDS` using the optional option `reservedWords`
const reservedWords = options.reservedWords || util.reservedWords;
if ( Array.isArray( reservedWords ) ) reservedWords.forEach( word => {
RESERVED_WORDS[ word ] = true;
} );
// Helper to construct a new AST Node
function createNode( type, details ) {
const node = new ast.Node( type, location() );
if ( details === null ) return node;
util.extend( node, details );
return util.enforceFastProperties( node );
}
// Used by `addComment` to store comments for the Grammar AST
const comments = options.extractComments ? {} : null;
// Helper that collects all the comments to pass to the Grammar AST
function addComment( text, multiline ) {
if ( options.extractComments ) {
const loc = location();
comments[ loc.start.offset ] = {
text: text,
multiline: multiline,
location: loc,
};
}
return text;
}
}
// ---- Syntactic Grammar -----
Grammar
= __ initializer:(@Initializer __)? rules:(@Rule __)+ {
return new ast.Grammar( initializer, rules, comments, location() );
}
Initializer
= code:CodeBlock EOS {
return createNode( "initializer", { code } );
}
Rule
= name:Identifier __ displayName:(@StringLiteral __)? "=" __ expression:Expression EOS {
if ( displayName )
expression = createNode( "named", {
name: displayName,
expression: expression,
} );
return createNode( "rule", { name, expression } );
}
Expression
= ChoiceExpression
ChoiceExpression
= head:ActionExpression tail:(__ "/" __ @ActionExpression)* {
if ( tail.length === 0 ) return head;
return createNode( "choice", {
alternatives: [ head ].concat( tail ),
} );
}
ActionExpression
= expression:SequenceExpression code:(__ @CodeBlock)? {
if ( code === null ) return expression;
return createNode( "action", { expression, code } );
}
SequenceExpression
= head:LabeledExpression tail:(__ @LabeledExpression)* {
let elements = [ head ];
if ( tail.length === 0 ) {
if ( head.type !== "labeled" || ! head.pick ) return head;
} else {
elements = elements.concat( tail );
}
return createNode( "sequence", { elements } );
}
LabeledExpression
= "@" label:LabelIdentifier? __ expression:PrefixedExpression {
return createNode( "labeled", { pick, label, expression } );
}
/ label:LabelIdentifier __ expression:PrefixedExpression {
return createNode( "labeled", { label, expression } );
}
/ PrefixedExpression
LabelIdentifier
= name:Identifier __ ":" {
if ( RESERVED_WORDS[ name ] !== true ) return name;
error( `Label can't be a reserved word "${ name }".`, location() );
}
PrefixedExpression
= operator:PrefixedOperator __ expression:SuffixedExpression {
return createNode( operator, { expression } );
}
/ SuffixedExpression
PrefixedOperator
= "$" { return "text"; }
/ "&" { return "simple_and"; }
/ "!" { return "simple_not"; }
SuffixedExpression
= expression:PrimaryExpression __ operator:SuffixedOperator {
return createNode( operator, { expression } );
}
/ PrimaryExpression
SuffixedOperator
= "?" { return "optional"; }
/ "*" { return "zero_or_more"; }
/ "+" { return "one_or_more"; }
PrimaryExpression
= LiteralMatcher
/ CharacterClassMatcher
/ AnyMatcher
/ RuleReferenceExpression
/ SemanticPredicateExpression
/ "(" __ e:Expression __ ")" {
// The purpose of the "group" AST node is just to isolate label scope. We
// don't need to put it around nodes that can't contain any labels or
// nodes that already isolate label scope themselves.
if ( e.type !== "labeled" && e.type !== "sequence" ) return e;
// This leaves us with "labeled" and "sequence".
return createNode( "group", { expression: e } );
}
RuleReferenceExpression
= name:Identifier !(__ (StringLiteral __)? "=") {
return createNode( "rule_ref", { name } );
}
SemanticPredicateExpression
= operator:SemanticPredicateOperator __ code:CodeBlock {
return createNode( operator, { code } );
}
SemanticPredicateOperator
= "&" { return "semantic_and"; }
/ "!" { return "semantic_not"; }
// ---- Lexical Grammar -----
SourceCharacter
= .
WhiteSpace "whitespace"
= "\t"
/ "\v"
/ "\f"
/ " "
/ "\u00A0"
/ "\uFEFF"
/ Zs
LineTerminator
= [\n\r\u2028\u2029]
LineTerminatorSequence "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
Comment "comment"
= MultiLineComment
/ SingleLineComment
MultiLineComment
= "/*" comment:$(!"*/" SourceCharacter)* "*/" {
return addComment( comment, true );
}
MultiLineCommentNoLineTerminator
= "/*" comment:$(!("*/" / LineTerminator) SourceCharacter)* "*/" {
return addComment( comment, true );
}
SingleLineComment
= "//" comment:$(!LineTerminator SourceCharacter)* {
return addComment( comment, false );
}
Identifier "identifier"
= head:IdentifierStart tail:IdentifierPart* {
return head + tail.join("");
}
IdentifierStart
= UnicodeLetter
/ "$"
/ "_"
/ "\\" @UnicodeEscapeSequence
IdentifierPart
= IdentifierStart
/ UnicodeCombiningMark
/ UnicodeDigit
/ UnicodeConnectorPunctuation
/ "\u200C"
/ "\u200D"
UnicodeLetter
= Lu
/ Ll
/ Lt
/ Lm
/ Lo
/ Nl
UnicodeCombiningMark
= Mn
/ Mc
UnicodeDigit
= Nd
UnicodeConnectorPunctuation
= Pc
LiteralMatcher "literal"
= value:StringLiteral ignoreCase:"i"? {
return createNode( "literal", {
value: value,
ignoreCase: ignoreCase !== null,
} );
}
StringLiteral "string"
= '"' chars:DoubleStringCharacter* '"' { return chars.join(""); }
/ "'" chars:SingleStringCharacter* "'" { return chars.join(""); }
DoubleStringCharacter
= !('"' / "\\" / LineTerminator) @SourceCharacter
/ "\\" @EscapeSequence
/ LineContinuation
SingleStringCharacter
= !("'" / "\\" / LineTerminator) @SourceCharacter
/ "\\" @EscapeSequence
/ LineContinuation
CharacterClassMatcher "character class"
= "[" inverted:"^"? parts:CharacterPart* "]" ignoreCase:"i"? {
return createNode( "class", {
parts: parts.filter( part => part !== "" ),
inverted: inverted !== null,
ignoreCase: ignoreCase !== null,
} );
}
CharacterPart
= ClassCharacterRange
/ ClassCharacter
ClassCharacterRange
= begin:ClassCharacter "-" end:ClassCharacter {
if ( begin.charCodeAt( 0 ) > end.charCodeAt( 0 ) )
error( "Invalid character range: " + text() + "." );
return [ begin, end ];
}
ClassCharacter
= !("]" / "\\" / LineTerminator) @SourceCharacter
/ "\\" @EscapeSequence
/ LineContinuation
LineContinuation
= "\\" LineTerminatorSequence { return ""; }
EscapeSequence
= CharacterEscapeSequence
/ "0" !DecimalDigit { return "\0"; }
/ HexEscapeSequence
/ UnicodeEscapeSequence
CharacterEscapeSequence
= SingleEscapeCharacter
/ NonEscapeCharacter
SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }
NonEscapeCharacter
= !(EscapeCharacter / LineTerminator) @SourceCharacter
EscapeCharacter
= SingleEscapeCharacter
/ DecimalDigit
/ "x"
/ "u"
HexEscapeSequence
= "x" digits:$(HexDigit HexDigit) {
return String.fromCharCode( parseInt( digits, 16 ) );
}
UnicodeEscapeSequence
= "u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
return String.fromCharCode( parseInt( digits, 16 ) );
}
DecimalDigit
= [0-9]
HexDigit
= [0-9a-f]i
AnyMatcher
= "." {
return createNode( "any" );
}
CodeBlock "code block"
= "{" @Code "}"
/ "{" { error("Unbalanced brace."); }
Code
= $((![{}] SourceCharacter)+ / "{" Code "}")*
// Unicode Character Categories
//
// Extracted from the following Unicode Character Database file:
//
// http://www.unicode.org/Public/11.0.0/ucd/extracted/DerivedGeneralCategory.txt
//
// Unix magic used:
//
// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
// cut -f1 -d " " | # Extract code points
// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
// sed -e 's/\.\./-/' | # Adjust formatting
// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
// tr -d '\n' # Join lines
//
// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
// at the time of writing.
//
// Non-BMP characters are completely ignored to avoid surrogate pair handling
// (detecting surrogate pairs isn't possible with a simple character class and
// other methods would degrade performance). I don't consider it a big deal as
// even parsers in JavaScript engines of common browsers seem to ignore them.
// Letter, Lowercase
Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0560-\u0588\u10D0-\u10FA\u10FD-\u10FF\u13F8-\u13FD\u1C80-\u1C88\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A]
// Letter, Modifier
Lm = [\u02B0-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0374\u037A\u0559\u0640\u06E5-\u06E6\u07F4-\u07F5\u07FA\u081A\u0824\u0828\u0971\u0E46\u0EC6\u10FC\u17D7\u1843\u1AA7\u1C78-\u1C7D\u1D2C-\u1D6A\u1D78\u1D9B-\u1DBF\u2071\u207F\u2090-\u209C\u2C7C-\u2C7D\u2D6F\u2E2F\u3005\u3031-\u3035\u303B\u309D-\u309E\u30FC-\u30FE\uA015\uA4F8-\uA4FD\uA60C\uA67F\uA69C-\uA69D\uA717-\uA71F\uA770\uA788\uA7F8-\uA7F9\uA9CF\uA9E6\uAA70\uAADD\uAAF3-\uAAF4\uAB5C-\uAB5F\uFF70\uFF9E-\uFF9F]
// Letter, Other
Lo = [\u00AA\u00BA\u01BB\u01C0-\u01C3\u0294\u05D0-\u05EA\u05EF-\u05F2\u0620-\u063F\u0641-\u064A\u066E-\u066F\u0671-\u06D3\u06D5\u06EE-\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u0800-\u0815\u0840-\u0858\u0860-\u086A\u08A0-\u08B4\u08B6-\u08BD\u0904-\u0939\u093D\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u09FC\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0-\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C60-\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDE\u0CE0-\u0CE1\u0CF1-\u0CF2\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065-\u1066\u106E-\u1070\u1075-\u1081\u108E\u1100-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u170C\u170E-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17DC\u1820-\u1842\u1844-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1B05-\u1B33\u1B45-\u1B4B\u1B83-\u1BA0\u1BAE-\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C77\u1CE9-\u1CEC\u1CEE-\u1CF1\u1CF5-\u1CF6\u2135-\u2138\u2D30-\u2D67\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u3006\u303C\u3041-\u3096\u309F\u30A1-\u30FA\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FEF\uA000-\uA014\uA016-\uA48C\uA4D0-\uA4F7\uA500-\uA60B\uA610-\uA61F\uA62A-\uA62B\uA66E\uA6A0-\uA6E5\uA78F\uA7F7\uA7FB-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD-\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9E0-\uA9E4\uA9E7-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA6F\uAA71-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5-\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADC\uAAE0-\uAAEA\uAAF2\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uABC0-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF66-\uFF6F\uFF71-\uFF9D\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]
// Letter, Titlecase
Lt = [\u01C5\u01C8\u01CB\u01F2\u1F88-\u1F8F\u1F98-\u1F9F\u1FA8-\u1FAF\u1FBC\u1FCC\u1FFC]
// Letter, Uppercase
Lu = [\u0041-\u005A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178-\u0179\u017B\u017D\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018B\u018E-\u0191\u0193-\u0194\u0196-\u0198\u019C-\u019D\u019F-\u01A0\u01A2\u01A4\u01A6-\u01A7\u01A9\u01AC\u01AE-\u01AF\u01B1-\u01B3\u01B5\u01B7-\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A-\u023B\u023D-\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u0370\u0372\u0376\u037F\u0386\u0388-\u038A\u038C\u038E-\u038F\u0391-\u03A1\u03A3-\u03AB\u03CF\u03D2-\u03D4\u03D8\u03DA\u03DC\u03DE\u03E0\u03E2\u03E4\u03E6\u03E8\u03EA\u03EC\u03EE\u03F4\u03F7\u03F9-\u03FA\u03FD-\u042F\u0460\u0462\u0464\u0466\u0468\u046A\u046C\u046E\u0470\u0472\u0474\u0476\u0478\u047A\u047C\u047E\u0480\u048A\u048C\u048E\u0490\u0492\u0494\u0496\u0498\u049A\u049C\u049E\u04A0\u04A2\u04A4\u04A6\u04A8\u04AA\u04AC\u04AE\u04B0\u04B2\u04B4\u04B6\u04B8\u04BA\u04BC\u04BE\u04C0-\u04C1\u04C3\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04D2\u04D4\u04D6\u04D8\u04DA\u04DC\u04DE\u04E0\u04E2\u04E4\u04E6\u04E8\u04EA\u04EC\u04EE\u04F0\u04F2\u04F4\u04F6\u04F8\u04FA\u04FC\u04FE\u0500\u0502\u0504\u0506\u0508\u050A\u050C\u050E\u0510\u0512\u0514\u0516\u0518\u051A\u051C\u051E\u0520\u0522\u0524\u0526\u0528\u052A\u052C\u052E\u0531-\u0556\u10A0-\u10C5\u10C7\u10CD\u13A0-\u13F5\u1C90-\u1CBA\u1CBD-\u1CBF\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFE\u1F08-\u1F0F\u1F18-\u1F1D\u1F28-\u1F2F\u1F38-\u1F3F\u1F48-\u1F4D\u1F59\u1F5B\u1F5D\u1F5F\u1F68-\u1F6F\u1FB8-\u1FBB\u1FC8-\u1FCB\u1FD8-\u1FDB\u1FE8-\u1FEC\u1FF8-\u1FFB\u2102\u2107\u210B-\u210D\u2110-\u2112\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u2130-\u2133\u213E-\u213F\u2145\u2183\u2C00-\u2C2E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E-\u2C80\u2C82\u2C84\u2C86\u2C88\u2C8A\u2C8C\u2C8E\u2C90\u2C92\u2C94\u2C96\u2C98\u2C9A\u2C9C\u2C9E\u2CA0\u2CA2\u2CA4\u2CA6\u2CA8\u2CAA\u2CAC\u2CAE\u2CB0\u2CB2\u2CB4\u2CB6\u2CB8\u2CBA\u2CBC\u2CBE\u2CC0\u2CC2\u2CC4\u2CC6\u2CC8\u2CCA\u2CCC\u2CCE\u2CD0\u2CD2\u2CD4\u2CD6\u2CD8\u2CDA\u2CDC\u2CDE\u2CE0\u2CE2\u2CEB\u2CED\u2CF2\uA640\uA642\uA644\uA646\uA648\uA64A\uA64C\uA64E\uA650\uA652\uA654\uA656\uA658\uA65A\uA65C\uA65E\uA660\uA662\uA664\uA666\uA668\uA66A\uA66C\uA680\uA682\uA684\uA686\uA688\uA68A\uA68C\uA68E\uA690\uA692\uA694\uA696\uA698\uA69A\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D-\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\uFF21-\uFF3A]
// Mark, Spacing Combining
Mc = [\u0903\u093B\u093E-\u0940\u0949-\u094C\u094E-\u094F\u0982-\u0983\u09BE-\u09C0\u09C7-\u09C8\u09CB-\u09CC\u09D7\u0A03\u0A3E-\u0A40\u0A83\u0ABE-\u0AC0\u0AC9\u0ACB-\u0ACC\u0B02-\u0B03\u0B3E\u0B40\u0B47-\u0B48\u0B4B-\u0B4C\u0B57\u0BBE-\u0BBF\u0BC1-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCC\u0BD7\u0C01-\u0C03\u0C41-\u0C44\u0C82-\u0C83\u0CBE\u0CC0-\u0CC4\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D40\u0D46-\u0D48\u0D4A-\u0D4C\u0D57\u0D82-\u0D83\u0DCF-\u0DD1\u0DD8-\u0DDF\u0DF2-\u0DF3\u0F3E-\u0F3F\u0F7F\u102B-\u102C\u1031\u1038\u103B-\u103C\u1056-\u1057\u1062-\u1064\u1067-\u106D\u1083-\u1084\u1087-\u108C\u108F\u109A-\u109C\u17B6\u17BE-\u17C5\u17C7-\u17C8\u1923-\u1926\u1929-\u192B\u1930-\u1931\u1933-\u1938\u1A19-\u1A1A\u1A55\u1A57\u1A61\u1A63-\u1A64\u1A6D-\u1A72\u1B04\u1B35\u1B3B\u1B3D-\u1B41\u1B43-\u1B44\u1B82\u1BA1\u1BA6-\u1BA7\u1BAA\u1BE7\u1BEA-\u1BEC\u1BEE\u1BF2-\u1BF3\u1C24-\u1C2B\u1C34-\u1C35\u1CE1\u1CF2-\u1CF3\u1CF7\u302E-\u302F\uA823-\uA824\uA827\uA880-\uA881\uA8B4-\uA8C3\uA952-\uA953\uA983\uA9B4-\uA9B5\uA9BA-\uA9BB\uA9BD-\uA9C0\uAA2F-\uAA30\uAA33-\uAA34\uAA4D\uAA7B\uAA7D\uAAEB\uAAEE-\uAAEF\uAAF5\uABE3-\uABE4\uABE6-\uABE7\uABE9-\uABEA\uABEC]
// Mark, Nonspacing
Mn = [\u0300-\u036F\u0483-\u0487\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D3-\u08E1\u08E3-\u0902\u093A\u093C\u0941-\u0948\u094D\u0951-\u0957\u0962-\u0963\u0981\u09BC\u09C1-\u09C4\u09CD\u09E2-\u09E3\u09FE\u0A01-\u0A02\u0A3C\u0A41-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A51\u0A70-\u0A71\u0A75\u0A81-\u0A82\u0ABC\u0AC1-\u0AC5\u0AC7-\u0AC8\u0ACD\u0AE2-\u0AE3\u0AFA-\u0AFF\u0B01\u0B3C\u0B3F\u0B41-\u0B44\u0B4D\u0B56\u0B62-\u0B63\u0B82\u0BC0\u0BCD\u0C00\u0C04\u0C3E-\u0C40\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C62-\u0C63\u0C81\u0CBC\u0CBF\u0CC6\u0CCC-\u0CCD\u0CE2-\u0CE3\u0D00-\u0D01\u0D3B-\u0D3C\u0D41-\u0D44\u0D4D\u0D62-\u0D63\u0DCA\u0DD2-\u0DD4\u0DD6\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F71-\u0F7E\u0F80-\u0F84\u0F86-\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102D-\u1030\u1032-\u1037\u1039-\u103A\u103D-\u103E\u1058-\u1059\u105E-\u1060\u1071-\u1074\u1082\u1085-\u1086\u108D\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17B4-\u17B5\u17B7-\u17BD\u17C6\u17C9-\u17D3\u17DD\u180B-\u180D\u1885-\u1886\u18A9\u1920-\u1922\u1927-\u1928\u1932\u1939-\u193B\u1A17-\u1A18\u1A1B\u1A56\u1A58-\u1A5E\u1A60\u1A62\u1A65-\u1A6C\u1A73-\u1A7C\u1A7F\u1AB0-\u1ABD\u1B00-\u1B03\u1B34\u1B36-\u1B3A\u1B3C\u1B42\u1B6B-\u1B73\u1B80-\u1B81\u1BA2-\u1BA5\u1BA8-\u1BA9\u1BAB-\u1BAD\u1BE6\u1BE8-\u1BE9\u1BED\u1BEF-\u1BF1\u1C2C-\u1C33\u1C36-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE0\u1CE2-\u1CE8\u1CED\u1CF4\u1CF8-\u1CF9\u1DC0-\u1DF9\u1DFB-\u1DFF\u20D0-\u20DC\u20E1\u20E5-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302D\u3099-\u309A\uA66F\uA674-\uA67D\uA69E-\uA69F\uA6F0-\uA6F1\uA802\uA806\uA80B\uA825-\uA826\uA8C4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA951\uA980-\uA982\uA9B3\uA9B6-\uA9B9\uA9BC\uA9E5\uAA29-\uAA2E\uAA31-\uAA32\uAA35-\uAA36\uAA43\uAA4C\uAA7C\uAAB0\uAAB2-\uAAB4\uAAB7-\uAAB8\uAABE-\uAABF\uAAC1\uAAEC-\uAAED\uAAF6\uABE5\uABE8\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F]
// Number, Decimal Digit
Nd = [\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19]
// Number, Letter
Nl = [\u16EE-\u16F0\u2160-\u2182\u2185-\u2188\u3007\u3021-\u3029\u3038-\u303A\uA6E6-\uA6EF]
// Punctuation, Connector
Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F]
// Separator, Space
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
// Skipped
__
= (WhiteSpace / LineTerminatorSequence / Comment)*
_
= (WhiteSpace / MultiLineCommentNoLineTerminator)*
// Automatic Semicolon Insertion
EOS
= __ ";"
/ _ SingleLineComment? LineTerminatorSequence
/ __ EOF
EOF
= !.

View file

@ -2,80 +2,90 @@
import * as acorn from "acorn"
import {
Syntax,
Token,
FuncDecl,
Identifier,
SyntaxKind,
TokenStream,
RetStmt,
VarDecl,
Stmt,
Patt,
Expr,
BindPatt,
Param,
RefExpr,
TypeRef,
TypeDecl,
ConstExpr,
QualName,
CallExpr,
ImportDecl,
SourceElement,
Module,
RecordDecl,
NewTypeDecl,
BoltToken,
BoltIdentifier,
createBoltFuncDecl,
createBoltIdentifier,
createBoltSyntaxKind,
createBoltTokenStream,
createBoltRetStmt,
createBoltVarDecl,
createBoltStmt,
createBoltPatt,
createBoltExpr,
createBoltBindPatt,
createBoltParam,
createBoltRefExpr,
createBoltTypeRef,
createBoltTypeDecl,
createBoltConstExpr,
createBoltQualName,
createBoltCallExpr,
createBoltImportDecl,
createBoltSourceElement,
createBoltModule,
createBoltRecordDecl,
createBoltNewTypeDecl,
BoltQualName,
BoltPattern,
createBoltBindPattern,
BoltImportDeclaration,
BoltTypeNode,
createBoltReferenceTypeNode,
createJSReferenceExpression,
createBoltReferenceExpression,
} from "./ast"
import { stringType, intType } from "./checker"
import { PrimValue } from "./evaluator"
import {BoltTokenStream} from "./util"
function describeKind(kind: SyntaxKind): string {
switch (kind) {
case SyntaxKind.Identifier:
case SyntaxKind.BoltIdentifier:
return "an identifier"
case SyntaxKind.Operator:
case SyntaxKind.BoltOperator:
return "an operator"
case SyntaxKind.StringLiteral:
case SyntaxKind.BoltStringLiteral:
return "a string"
case SyntaxKind.IntegerLiteral:
case SyntaxKind.BoltIntegerLiteral:
return "an integer"
case SyntaxKind.FnKeyword:
case SyntaxKind.BoltFnKeyword:
return "'fn'"
case SyntaxKind.ForeignKeyword:
case SyntaxKind.BoltForeignKeyword:
return "'foreign'"
case SyntaxKind.PubKeyword:
case SyntaxKind.BoltPubKeyword:
return "'pub'"
case SyntaxKind.LetKeyword:
case SyntaxKind.BoltLetKeyword:
return "'let'"
case SyntaxKind.Semi:
case SyntaxKind.BoltSemi:
return "';'"
case SyntaxKind.Colon:
case SyntaxKind.BoltColon:
return "':'"
case SyntaxKind.Dot:
case SyntaxKind.BoltDot:
return "'.'"
case SyntaxKind.RArrow:
case SyntaxKind.BoltRArrow:
return "'->'"
case SyntaxKind.Comma:
case SyntaxKind.BoltComma:
return "','"
case SyntaxKind.ModKeyword:
case SyntaxKind.BoltModKeyword:
return "'mod'"
case SyntaxKind.StructKeyword:
case SyntaxKind.BoltStructKeyword:
return "'struct'"
case SyntaxKind.EnumKeyword:
case SyntaxKind.BoltEnumKeyword:
return "'enum'"
case SyntaxKind.Braced:
case SyntaxKind.BoltBraced:
return "'{' .. '}'"
case SyntaxKind.Bracketed:
case SyntaxKind.BoltBracketed:
return "'[' .. ']'"
case SyntaxKind.Parenthesized:
case SyntaxKind.BoltParenthesized:
return "'(' .. ')'"
case SyntaxKind.EOS:
case SyntaxKind.BoltEOS:
return "'}', ')', ']' or end-of-file"
default:
throw new Error(`failed to describe ${SyntaxKind[kind]}`)
throw new Error(`failed to describe ${kindToString(kind)}`)
}
}
@ -140,76 +150,76 @@ export class Parser {
];
parseQualName(tokens: TokenStream): QualName {
parseQualName(tokens: BoltTokenStream): BoltQualName {
const path: Identifier[] = [];
const path: BoltIdentifier[] = [];
while (true) {
const t0 = tokens.peek(2);
if (t0.kind !== SyntaxKind.Dot) {
if (t0.kind !== SyntaxKind.BoltDot) {
break;
}
path.push(tokens.get() as Identifier)
path.push(tokens.get() as BoltIdentifier)
tokens.get();
}
const name = tokens.get();
if (name.kind !== SyntaxKind.Identifier) {
throw new ParseError(name, [SyntaxKind.Identifier]);
if (name.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(name, [SyntaxKind.BoltIdentifier]);
}
const startNode = path.length > 0 ? path[0] : name;
const endNode = name;
return new QualName(name, path, null, [startNode, endNode]);
return createBoltQualName(path, name, null, [startNode, endNode]);
}
parsePattern(tokens: TokenStream): Patt {
parsePattern(tokens: BoltTokenStream): BoltPattern {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
if (t0.kind === SyntaxKind.BoltIdentifier) {
tokens.get();
return new BindPatt(t0, null, t0)
return createBoltBindPattern(t0.text, null, [t0, t0])
} else {
throw new ParseError(t0, [SyntaxKind.Identifier])
throw new ParseError(t0, [SyntaxKind.BoltIdentifier])
}
}
parseImportDecl(tokens: TokenStream): ImportDecl {
parseImportDecl(tokens: BoltTokenStream): BoltImportDeclaration {
// Assuming first keyword is 'import'
tokens.get();
const t0 = tokens.get();
if (t0.kind !== SyntaxKind.StringLiteral) {
throw new ParseError(t0, [SyntaxKind.StringLiteral])
if (t0.kind !== SyntaxKind.BoltStringLiteral) {
throw new ParseError(t0, [SyntaxKind.BoltStringLiteral])
}
return new ImportDecl(t0.value, null, t0);
return createBoltImportDecl(t0.value, null, t0);
}
parseTypeDecl(tokens: TokenStream): TypeDecl {
parseTypeDecl(tokens: BoltTokenStream): BoltTypeNode {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.Identifier) {
if (t0.kind === SyntaxKind.BoltIdentifier) {
const name = this.parseQualName(tokens)
return new TypeRef(name, [], null, name.origNode)
return createBoltReferenceTypeNode(name, [], null, name.origNodes)
} else {
throw new ParseError(t0, [SyntaxKind.Identifier]);
throw new ParseError(t0, [SyntaxKind.BoltIdentifier]);
}
}
parsePrimExpr(tokens: TokenStream): Expr {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.StringLiteral) {
if (t0.kind === SyntaxKind.BoltStringLiteral) {
tokens.get();
return new ConstExpr(new PrimValue(stringType, t0.value), null, t0);
} else if (t0.kind === SyntaxKind.IntegerLiteral) {
} else if (t0.kind === SyntaxKind.BoltIntegerLiteral) {
tokens.get();
return new ConstExpr(new PrimValue(intType, t0.value), null, t0);
} else if (t0.kind === SyntaxKind.Identifier) {
} else if (t0.kind === SyntaxKind.BoltIdentifier) {
const name = this.parseQualName(tokens);
return new RefExpr(name, null, name.origNode);
return createBoltReferenceExpression(name, null, name.origNode);
} else {
throw new ParseError(t0, [SyntaxKind.StringLiteral, SyntaxKind.Identifier]);
throw new ParseError(t0, [SyntaxKind.BoltStringLiteral, SyntaxKind.BoltIdentifier]);
}
}
@ -219,8 +229,8 @@ export class Parser {
tokens.get();
const t1 = tokens.get();
if (t1.kind !== SyntaxKind.Braced) {
throw new ParseError(t1, [SyntaxKind.Braced])
if (t1.kind !== SyntaxKind.BoltBraced) {
throw new ParseError(t1, [SyntaxKind.BoltBraced])
}
const innerTokens = t1.toTokenStream();
@ -228,8 +238,8 @@ export class Parser {
const pattern = this.parsePattern(innerTokens)
const t2 = innerTokens.get();
if (t2.kind !== SyntaxKind.RArrow) {
throw new ParseError(t2, [SyntaxKind.RArrow]);
if (t2.kind !== SyntaxKind.BoltRArrow) {
throw new ParseError(t2, [SyntaxKind.BoltRArrow]);
}
const body = this.parseBody(innerTokens);
@ -250,17 +260,17 @@ export class Parser {
const pattern = this.parsePattern(tokens)
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Colon) {
if (t0.kind === SyntaxKind.BoltColon) {
tokens.get();
typeDecl = this.parseTypeDecl(tokens);
const t1 = tokens.peek(1);
if (t1.kind === SyntaxKind.EqSign) {
if (t1.kind === SyntaxKind.BoltEqSign) {
tokens.get();
defaultValue = this.parseExpr(tokens);
}
}
if (t0.kind === SyntaxKind.EqSign) {
if (t0.kind === SyntaxKind.BoltEqSign) {
tokens.get();
defaultValue = this.parseExpr(tokens);
}
@ -279,7 +289,7 @@ export class Parser {
tokens.get();
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.Identifier && t0.text === 'mut') {
if (t0.kind === SyntaxKind.BoltIdentifier && t0.text === 'mut') {
tokens.get();
isMutable = true;
}
@ -287,13 +297,13 @@ export class Parser {
const bindings = this.parsePattern(tokens)
const t1 = tokens.peek();
if (t1.kind === SyntaxKind.Colon) {
if (t1.kind === SyntaxKind.BoltColon) {
tokens.get();
typeDecl = this.parseTypeDecl(tokens);
}
const t2 = tokens.peek();
if (t2.kind === SyntaxKind.EqSign) {
if (t2.kind === SyntaxKind.BoltEqSign) {
tokens.get();
value = this.parseExpr(tokens);
}
@ -310,7 +320,7 @@ export class Parser {
let expr = null;
const t1 = tokens.peek();
if (t1.kind !== SyntaxKind.EOS) {
if (t1.kind !== SyntaxKind.BoltEOS) {
expr = this.parseExpr(tokens)
}
@ -326,24 +336,24 @@ export class Parser {
let isPublic = false;
let kw = tokens.get();
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.PubKeyword, SyntaxKind.StructKeyword]);
if (kw.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(kw, [SyntaxKind.BoltPubKeyword, SyntaxKind.BoltStructKeyword]);
}
if (kw.text === 'pub') {
isPublic = true;
kw = tokens.get();
}
if (kw.kind !== SyntaxKind.Identifier || kw.text !== 'struct') {
throw new ParseError(kw, [SyntaxKind.StructKeyword])
if (kw.kind !== SyntaxKind.BoltIdentifier || kw.text !== 'struct') {
throw new ParseError(kw, [SyntaxKind.BoltStructKeyword])
}
const name = this.parseQualName(tokens);
const t2 = tokens.get();
if (t2.kind !== SyntaxKind.Braced) {
throw new ParseError(kw, [SyntaxKind.Braced])
if (t2.kind !== SyntaxKind.BoltBraced) {
throw new ParseError(kw, [SyntaxKind.BoltBraced])
}
let fields = [];
@ -362,23 +372,23 @@ export class Parser {
let isPublic = false;
let kw = tokens.get();
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.PubKeyword, SyntaxKind.ModKeyword]);
if (kw.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(kw, [SyntaxKind.BoltPubKeyword, SyntaxKind.BoltModKeyword]);
}
if (kw.text === 'pub') {
isPublic = true;
kw = tokens.get();
}
if (kw.kind !== SyntaxKind.Identifier || kw.text !== 'mod') {
throw new ParseError(kw, [SyntaxKind.ModKeyword])
if (kw.kind !== SyntaxKind.BoltIdentifier || kw.text !== 'mod') {
throw new ParseError(kw, [SyntaxKind.BoltModKeyword])
}
const name = this.parseQualName(tokens);
const t1 = tokens.get();
if (t1.kind !== SyntaxKind.Braced) {
throw new ParseError(t1, [SyntaxKind.Braced])
if (t1.kind !== SyntaxKind.BoltBraced) {
throw new ParseError(t1, [SyntaxKind.BoltBraced])
}
return new Module(isPublic, name, t1.toSentences());
@ -387,8 +397,8 @@ export class Parser {
protected assertEmpty(tokens: TokenStream) {
const t0 = tokens.peek(1);
if (t0.kind !== SyntaxKind.EOS) {
throw new ParseError(t0, [SyntaxKind.EOS]);
if (t0.kind !== SyntaxKind.BoltEOS) {
throw new ParseError(t0, [SyntaxKind.BoltEOS]);
}
}
@ -396,24 +406,24 @@ export class Parser {
let isPublic = false;
let t0 = tokens.get();
if (t0.kind !== SyntaxKind.Identifier) {
throw new ParseError(t0, [SyntaxKind.PubKeyword, SyntaxKind.NewTypeKeyword])
if (t0.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(t0, [SyntaxKind.BoltPubKeyword, SyntaxKind.BoltNewTypeKeyword])
}
if (t0.text === 'pub') {
isPublic = true;
t0 = tokens.get();
if (t0.kind !== SyntaxKind.Identifier) {
throw new ParseError(t0, [SyntaxKind.NewTypeKeyword])
if (t0.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(t0, [SyntaxKind.BoltNewTypeKeyword])
}
}
if (t0.text !== 'newtype') {
throw new ParseError(t0, [SyntaxKind.NewTypeKeyword])
throw new ParseError(t0, [SyntaxKind.BoltNewTypeKeyword])
}
const name = tokens.get();
if (name.kind !== SyntaxKind.Identifier) {
throw new ParseError(name, [SyntaxKind.Identifier])
if (name.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(name, [SyntaxKind.BoltIdentifier])
}
return new NewTypeDecl(isPublic, name)
@ -426,8 +436,8 @@ export class Parser {
let isPublic = false;
const k0 = tokens.peek();
if (k0.kind !== SyntaxKind.Identifier) {
throw new ParseError(k0, [SyntaxKind.PubKeyword, SyntaxKind.ForeignKeyword, SyntaxKind.FnKeyword])
if (k0.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(k0, [SyntaxKind.BoltPubKeyword, SyntaxKind.BoltForeignKeyword, SyntaxKind.BoltFnKeyword])
}
if (k0.text === 'pub') {
tokens.get();
@ -435,20 +445,20 @@ export class Parser {
}
const k1 = tokens.peek();
if (k1.kind !== SyntaxKind.Identifier) {
throw new ParseError(k1, [SyntaxKind.ForeignKeyword, SyntaxKind.FnKeyword])
if (k1.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(k1, [SyntaxKind.BoltForeignKeyword, SyntaxKind.BoltFnKeyword])
}
if (k1.text === 'foreign') {
tokens.get();
const l1 = tokens.get();
if (l1.kind !== SyntaxKind.StringLiteral) {
throw new ParseError(l1, [SyntaxKind.StringLiteral])
if (l1.kind !== SyntaxKind.BoltStringLiteral) {
throw new ParseError(l1, [SyntaxKind.BoltStringLiteral])
}
target = l1.value;
}
const k2 = tokens.get();
if (k2.kind !== SyntaxKind.Identifier || k2.text !== 'fn') {
throw new ParseError(k2, [SyntaxKind.FnKeyword])
if (k2.kind !== SyntaxKind.BoltIdentifier || k2.text !== 'fn') {
throw new ParseError(k2, [SyntaxKind.BoltFnKeyword])
}
let name: QualName;
@ -462,37 +472,37 @@ export class Parser {
const t1 = tokens.peek(2);
const isParamLike = (token: Token) =>
token.kind === SyntaxKind.Identifier || token.kind === SyntaxKind.Parenthesized;
token.kind === SyntaxKind.BoltIdentifier || token.kind === SyntaxKind.BoltParenthesized;
const parseParamLike = (tokens: TokenStream) => {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
if (t0.kind === SyntaxKind.BoltIdentifier) {
tokens.get();
return new Param(new BindPatt(t0, null, t0), null, null, null, t0)
} else if (t0.kind === SyntaxKind.Parenthesized) {
} else if (t0.kind === SyntaxKind.BoltParenthesized) {
tokens.get();
const innerTokens = t0.toTokenStream();
const param = this.parseParam(innerTokens)
this.assertEmpty(innerTokens);
return param
} else {
throw new ParseError(t0, [SyntaxKind.Identifier, SyntaxKind.Parenthesized])
throw new ParseError(t0, [SyntaxKind.BoltIdentifier, SyntaxKind.BoltParenthesized])
}
}
if (t0.kind === SyntaxKind.Operator) {
if (t0.kind === SyntaxKind.BoltOperator) {
name = new QualName(t0, [], null, t0);
tokens.get();
params.push(parseParamLike(tokens))
} else if (isParamLike(t0) && t1.kind == SyntaxKind.Operator) {
} else if (isParamLike(t0) && t1.kind == SyntaxKind.BoltOperator) {
params.push(parseParamLike(tokens));
name = new QualName(t1, [], null, t1);
while (true) {
const t2 = tokens.peek();
if (t2.kind !== SyntaxKind.Operator) {
if (t2.kind !== SyntaxKind.BoltOperator) {
break;
}
if (t2.text !== t1.text) {
@ -502,39 +512,39 @@ export class Parser {
params.push(parseParamLike(tokens))
}
} else if (t0.kind === SyntaxKind.Identifier) {
} else if (t0.kind === SyntaxKind.BoltIdentifier) {
name = this.parseQualName(tokens)
const t2 = tokens.get();
if (t2.kind === SyntaxKind.Parenthesized) {
if (t2.kind === SyntaxKind.BoltParenthesized) {
const innerTokens = t2.toTokenStream();
while (true) {
const t3 = innerTokens.peek();
if (t3.kind === SyntaxKind.EOS) {
if (t3.kind === SyntaxKind.BoltEOS) {
break;
}
params.push(this.parseParam(innerTokens))
const t4 = innerTokens.get();
if (t4.kind === SyntaxKind.Comma) {
if (t4.kind === SyntaxKind.BoltComma) {
continue;
} else if (t4.kind === SyntaxKind.EOS) {
} else if (t4.kind === SyntaxKind.BoltEOS) {
break;
} else {
throw new ParseError(t4, [SyntaxKind.Comma, SyntaxKind.EOS])
throw new ParseError(t4, [SyntaxKind.BoltComma, SyntaxKind.BoltEOS])
}
}
}
} else {
throw new ParseError(t0, [SyntaxKind.Identifier, SyntaxKind.Operator, SyntaxKind.Parenthesized])
throw new ParseError(t0, [SyntaxKind.BoltIdentifier, SyntaxKind.BoltOperator, SyntaxKind.BoltParenthesized])
}
// Parse return type
const t2 = tokens.peek();
if (t2.kind === SyntaxKind.RArrow) {
if (t2.kind === SyntaxKind.BoltRArrow) {
tokens.get();
returnType = this.parseTypeDecl(tokens);
}
@ -542,7 +552,7 @@ export class Parser {
// Parse function body
const t3 = tokens.peek();
if (t3.kind === SyntaxKind.Braced) {
if (t3.kind === SyntaxKind.BoltBraced) {
tokens.get();
switch (target) {
case "Bolt":
@ -562,21 +572,23 @@ export class Parser {
parseSourceElement(tokens: TokenStream): SourceElement {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
if (t0.kind === SyntaxKind.BoltIdentifier) {
let i = 1;
let kw: Token = t0;
if (t0.text === 'pub') {
i++;
kw = tokens.peek(i);
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.ForeignKeyword, SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
if (kw.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(kw, [SyntaxKind.BoltForeignKeyword, SyntaxKind.BoltModKeyword,
SyntaxKind.BoltLetKeyword, SyntaxKind.BoltFnKeyword, SyntaxKind.BoltEnumKeyword, SyntaxKind.BoltStructKeyword])
}
}
if (t0.text === 'foreign') {
i += 2;
kw = tokens.peek(i);
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
if (kw.kind !== SyntaxKind.BoltIdentifier) {
throw new ParseError(kw, [SyntaxKind.BoltModKeyword, SyntaxKind.BoltLetKeyword,
SyntaxKind.BoltFnKeyword, SyntaxKind.BoltEnumKeyword, SyntaxKind.BoltStructKeyword])
}
}
switch (kw.text) {
@ -599,7 +611,8 @@ export class Parser {
return this.parseExpr(tokens)
} catch (e) {
if (e instanceof ParseError) {
throw new ParseError(kw, [...e.expected, SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
throw new ParseError(kw, [...e.expected, SyntaxKind.BoltModKeyword, SyntaxKind.BoltLetKeyword,
SyntaxKind.BoltFnKeyword, SyntaxKind.BoltEnumKeyword, SyntaxKind.BoltStructKeyword])
} else {
throw e;
}
@ -613,7 +626,7 @@ export class Parser {
getOperatorDesc(seekArity: number, seekName: string): OperatorInfo {
for (let i = 0; i < this.operatorTable.length; ++i) {
for (const [kind, arity, name] of this.operatorTable[i]) {
if (artity == seekArity && name === seekName) {
if (arity == seekArity && name === seekName) {
return {
kind,
name,
@ -628,7 +641,7 @@ export class Parser {
parseBinOp(tokens: TokenStream, lhs: Expr , minPrecedence: number) {
let lookahead = tokens.peek(1);
while (true) {
if (lookahead.kind !== SyntaxKind.Operator) {
if (lookahead.kind !== SyntaxKind.BoltOperator) {
break;
}
const lookaheadDesc = this.getOperatorDesc(2, lookahead.text);
@ -657,23 +670,23 @@ export class Parser {
const args: Expr[] = []
const t2 = tokens.get();
if (t2.kind !== SyntaxKind.Parenthesized) {
throw new ParseError(t2, [SyntaxKind.Parenthesized])
if (t2.kind !== SyntaxKind.BoltParenthesized) {
throw new ParseError(t2, [SyntaxKind.BoltParenthesized])
}
const innerTokens = t2.toTokenStream();
while (true) {
const t3 = innerTokens.peek();
if (t3.kind === SyntaxKind.EOS) {
if (t3.kind === SyntaxKind.BoltEOS) {
break;
}
args.push(this.parseExpr(innerTokens))
const t4 = innerTokens.get();
if (t4.kind === SyntaxKind.EOS) {
if (t4.kind === SyntaxKind.BoltEOS) {
break
} else if (t4.kind !== SyntaxKind.Comma){
throw new ParseError(t4, [SyntaxKind.Comma])
} else if (t4.kind !== SyntaxKind.BoltComma){
throw new ParseError(t4, [SyntaxKind.BoltComma])
}
}

View file

@ -1,7 +1,6 @@
import * as path from "path"
import * as fs from "fs-extra"
import * as crypto from "crypto"
import { Parser } from "./parser"
import { TypeChecker } from "./checker"
@ -10,12 +9,15 @@ import { Expander } from "./expander"
import { Scanner } from "./scanner"
import { Compiler } from "./compiler"
import { emit } from "./emitter"
import { TextFile, SourceFile, AnySourceFile } from "./ast"
import { upsearchSync, FastStringMap } from "./util"
import { TextFile } from "./text"
import { BoltSourceFile, Syntax } from "./ast"
import { upsearchSync, FastStringMap, getFileStem, getLanguage } from "./util"
import { Package } from "./package"
const targetExtensions: FastStringMap<string> = {
'JS': '.mjs'
'JS': '.mjs',
'Bolt': '.bolt',
'C': '.c',
};
export class Program {
@ -25,7 +27,7 @@ export class Program {
public checker: TypeChecker;
public expander: Expander;
private sourceFiles = new Map<string, SourceFile>();
private sourceFiles = new Map<string, BoltSourceFile>();
private packages: FastStringMap<Package> = Object.create(null);
constructor(files: TextFile[]) {
@ -34,6 +36,7 @@ export class Program {
this.evaluator = new Evaluator(this.checker);
this.expander = new Expander(this.parser, this.evaluator, this.checker);
for (const file of files) {
console.log(`Loading ${file.origPath} ...`);
const contents = fs.readFileSync(file.fullPath, 'utf8');
const scanner = new Scanner(file, contents)
this.sourceFiles.set(file.fullPath, scanner.scan());
@ -66,20 +69,21 @@ export class Program {
if (pkg !== null) {
}
fs.writeFileSync(this.mapToTargetFile(rootNode), emit(node), 'utf8');
fs.mkdirp('.bolt-work');
fs.writeFileSync(this.mapToTargetFile(rootNode), emit(rootNode), 'utf8');
}
}
private mapToTargetFile(node: AnySourceFile) {
getFileStem(node.span.file.fullPath) + '.' + getDefaultExtension(getTargetLanguage(node.kind));
private mapToTargetFile(node: Syntax) {
return path.join('.bolt-work', getFileStem(node.span!.file.fullPath) + getDefaultExtension(getLanguage(node)));
}
eval(file: TextFile) {
const original = this.sourceFiles.get(file);
eval(filename: string) {
const original = this.sourceFiles.get(filename);
if (original === undefined) {
throw new Error(`File ${file.path} does not seem to be part of this Program.`)
throw new Error(`File ${filename} does not seem to be part of this Program.`)
}
const expanded = this.expander.getFullyExpanded(original) as SourceFile;
const expanded = this.expander.getFullyExpanded(original) as BoltSourceFile;
return this.evaluator.eval(expanded)
}
@ -92,8 +96,3 @@ function getDefaultExtension(target: string) {
return targetExtensions[target];
}
function getFileStem(filepath: string): string {
const chunks = path.basename(filepath).split('.')
return chunks[chunks.length-1];
}

View file

@ -2,31 +2,39 @@
import XRegExp from "xregexp"
import {
SyntaxKind,
TextFile,
TextPos,
TextSpan,
Identifier,
RArrow,
Operator,
PunctType,
Token,
Decl,
Parenthesized,
Braced,
Bracketed,
Sentence,
SourceFile,
Semi,
Comma,
StringLiteral,
IntegerLiteral,
Colon,
EOS,
Dot,
EqSign,
} from "./text"
import {
SyntaxKind,
BoltToken,
BoltSentence,
createBoltSentence,
createBoltIdentifier,
createBoltRArrow,
createBoltOperator,
createBoltParenthesized,
createBoltBraced,
createBoltBracketed,
createBoltSourceFile,
createBoltSemi,
createBoltComma,
createBoltStringLiteral,
createBoltIntegerLiteral,
createBoltColon,
createBoltEOS,
createBoltDot,
createBoltEqSign,
} from "./ast"
export enum PunctType {
Paren,
Bracket,
Brace,
}
function escapeChar(ch: string) {
switch (ch) {
case '\a': return '\\a';
@ -90,14 +98,10 @@ function isOpenPunct(ch: string) {
class ScanError extends Error {
constructor(public file: TextFile, public position: TextPos, public char: string) {
super(`${file.path}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
}
}
interface Stream<T> {
read(): T
}
function isDigit(ch: string) {
return XRegExp('\\p{Nd}').test(ch)
}
@ -132,7 +136,7 @@ const EOF = ''
export class Scanner {
protected buffer: string[] = [];
protected scanned: Token[] = [];
protected scanned: BoltToken[] = [];
protected currPos: TextPos;
protected offset = 0;
@ -188,7 +192,7 @@ export class Scanner {
return text;
}
scanToken(): Token {
scanToken(): BoltToken {
while (true) {
@ -202,25 +206,25 @@ export class Scanner {
const startPos = this.currPos.clone()
if (c0 == EOF) {
return new EOS(new TextSpan(this.file, startPos, startPos));
return createBoltEOS(new TextSpan(this.file, startPos, startPos));
}
switch (c0) {
case '.':
this.getChar();
return new Dot(new TextSpan(this.file, startPos, this.currPos.clone()));
return createBoltDot(new TextSpan(this.file, startPos, this.currPos.clone()));
case '=':
this.getChar();
return new EqSign(new TextSpan(this.file, startPos, this.currPos.clone()));
return createBoltEqSign(new TextSpan(this.file, startPos, this.currPos.clone()));
case ';':
this.getChar();
return new Semi(new TextSpan(this.file, startPos, this.currPos.clone()));
return createBoltSemi(new TextSpan(this.file, startPos, this.currPos.clone()));
case ',':
this.getChar();
return new Comma(new TextSpan(this.file, startPos, this.currPos.clone()));
return createBoltComma(new TextSpan(this.file, startPos, this.currPos.clone()));
case ':':
this.getChar();
return new Colon(new TextSpan(this.file, startPos, this.currPos.clone()));
return createBoltColon(new TextSpan(this.file, startPos, this.currPos.clone()));
}
if (c0 === '"') {
@ -245,13 +249,13 @@ export class Scanner {
const endPos = this.currPos.clone();
return new StringLiteral(text, new TextSpan(this.file, startPos, endPos))
return createBoltStringLiteral(text, new TextSpan(this.file, startPos, endPos))
} else if (isDigit(c0)) {
const digits = this.takeWhile(isDigit)
const endPos = this.currPos.clone();
return new IntegerLiteral(BigInt(digits), new TextSpan(this.file, startPos, endPos));
return createBoltIntegerLiteral(BigInt(digits), new TextSpan(this.file, startPos, endPos));
} else if (isOpenPunct(c0)) {
@ -287,11 +291,11 @@ export class Scanner {
switch (punctType) {
case PunctType.Brace:
return new Braced(text, new TextSpan(this.file, startPos, endPos));
return createBoltBraced(text, new TextSpan(this.file, startPos, endPos));
case PunctType.Paren:
return new Parenthesized(text, new TextSpan(this.file, startPos, endPos));
return createBoltParenthesized(text, new TextSpan(this.file, startPos, endPos));
case PunctType.Bracket:
return new Bracketed(text, new TextSpan(this.file, startPos, endPos));
return createBoltBracketed(text, new TextSpan(this.file, startPos, endPos));
default:
throw new Error("Got an invalid state.")
}
@ -300,7 +304,7 @@ export class Scanner {
const name = this.takeWhile(isIdentPart);
const endPos = this.currPos.clone();
return new Identifier(name, new TextSpan(this.file, startPos, endPos))
return createBoltIdentifier(name, new TextSpan(this.file, startPos, endPos))
} else if (isOperatorStart(c0)) {
@ -309,9 +313,9 @@ export class Scanner {
const span = new TextSpan(this.file, startPos, endPos);
if (text === '->') {
return new RArrow(span);
return createBoltRArrow(span);
} else {
return new Operator(text, span);
return createBoltOperator(text, span);
}
} else {
@ -324,14 +328,14 @@ export class Scanner {
}
peek(count = 1): Token {
peek(count = 1): BoltToken {
while (this.scanned.length < count) {
this.scanned.push(this.scanToken());
}
return this.scanned[count - 1];
}
get(): Token {
get(): BoltToken {
return this.scanned.length > 0
? this.scanned.shift()!
: this.scanToken();
@ -339,33 +343,33 @@ export class Scanner {
scanTokens() {
const elements: Sentence[] = []
const elements: BoltSentence[] = []
outer: while (true) {
const tokens: Token[] = [];
const tokens: BoltToken[] = [];
inner: while (true) {
const token = this.scanToken();
if (token.kind === SyntaxKind.EOS) {
if (token.kind === SyntaxKind.BoltEOS) {
if (tokens.length === 0) {
break outer;
} else {
break inner;
}
}
if (token.kind === SyntaxKind.Semi) {
if (token.kind === SyntaxKind.BoltSemi) {
break;
}
tokens.push(token)
if (token.kind === SyntaxKind.Braced) {
if (token.kind === SyntaxKind.BoltBraced) {
break;
}
}
if (tokens.length > 0) {
elements.push(
new Sentence(
createBoltSentence(
tokens,
new TextSpan(this.file, tokens[0].span!.start.clone(), tokens[tokens.length-1].span!.end.clone())
)
@ -381,7 +385,7 @@ export class Scanner {
const startPos = this.currPos.clone();
const elements = this.scanTokens();
const endPos = this.currPos.clone();
return new SourceFile(elements, new TextSpan(this.file, startPos, endPos));
return createBoltSourceFile(elements, new TextSpan(this.file, startPos, endPos));
}
}

47
src/text.ts Normal file
View file

@ -0,0 +1,47 @@
import * as path from "path"
export class TextFile {
constructor(public origPath: string) {
}
get fullPath() {
return path.resolve(this.origPath)
}
}
export class TextPos {
constructor(
public offset: number,
public line: number,
public column: number
) {
}
clone() {
return new TextPos(this.offset, this.line, this.column)
}
}
export class TextSpan {
constructor(
public file: TextFile,
public start: TextPos,
public end: TextPos
) {
}
clone() {
return new TextSpan(this.file, this.start.clone(), this.end.clone());
}
}

View file

@ -7,14 +7,14 @@ const nodeProto = {
}
}
function isSyntax(value) {
return typeof value === 'object'
&& value !== null
&& value.__NODE_TYPE !== undefined;
}
function createNode(nodeType) {
const obj = Object.create(nodeProto);
Object.defineProperty(obj, '__IS_NODE', {
enumerable: false,
writable: false,
configurable: true,
value: true,
});
Object.defineProperty(obj, '__NODE_TYPE', {
enumerable: false,
writable: false,
@ -23,9 +23,8 @@ function createNode(nodeType) {
});
Object.defineProperty(obj, 'kind', {
enumerable: false,
writable: false,
configurable: true,
getter() {
get() {
return this.__NODE_TYPE.index;
}
});
@ -35,6 +34,7 @@ function createNode(nodeType) {
}
for (const nodeName of Object.keys(NODE_TYPES)) {
exported[`create${nodeName}`] = function (...args) {
const nodeType = NODE_TYPES[nodeName];
const node = createNode(nodeType);
@ -67,6 +67,7 @@ for (const nodeName of Object.keys(NODE_TYPES)) {
}
return node;
}
}
if (typeof module !== 'undefined') {

328
src/treegen/index.ts Normal file
View file

@ -0,0 +1,328 @@
import * as fs from "fs"
import * as path from "path"
const PACKAGE_ROOT = path.resolve(__dirname, '..', '..');
import { Syntax, Declaration, NodeDeclaration, TypeDeclaration, EnumDeclaration, TypeNode, NodeField } from "./ast"
import { FastStringMap } from "../util"
import { FileWriter } from "./util"
export function generateAST(decls: Declaration[]) {
let jsFile = new FileWriter();
let dtsFile = new FileWriter();
let i;
// Sort declarations by category
const nodeDecls: NodeDeclaration[] = decls.filter(decl => decl.type === 'NodeDeclaration') as NodeDeclaration[];
const typeDecls: TypeDeclaration[] = decls.filter(decl => decl.type === 'TypeDeclaration') as TypeDeclaration[];
const enumDecls: EnumDeclaration[] = decls.filter(decl => decl.type === 'EnumDeclaration') as EnumDeclaration[];
const langNames: string[] = decls.filter(decl => decl.type === 'LanguageDeclaration').map(decl => decl.name);
const declByName: FastStringMap<Declaration> = Object.create(null);
i = 0;
for (const decl of decls) {
decl.index = i++;
declByName[decl.name] = decl;
}
// Generate a mapping from parent node to child node
// This makes it easy to generate union types for the intermediate nodes.
const childrenOf: FastStringMap<string[]> = Object.create(null);
for (const nodeDecl of nodeDecls) {
for (const parentName of nodeDecl.parents) {
if (childrenOf[parentName] === undefined) {
childrenOf[parentName] = [];
}
childrenOf[parentName].push(nodeDecl.name);
}
}
// After we're done mappping parents to children, we can use isLeafNode()
// to store the nodes we will be iterating most frequently on.
const leafNodes: NodeDeclaration[] = nodeDecls.filter(decl => isLeafNode(decl.name));
// Write a JavaScript file that contains all AST definitions.
jsFile.write(`\nconst NODE_TYPES = {\n`);
jsFile.indent();
for (const decl of leafNodes) {
if (decl.type === 'NodeDeclaration' && isLeafNode(decl.name)) {
jsFile.write(`'${decl.name}': {\n`);
jsFile.indent();
jsFile.write(`index: ${decl.index},\n`);
jsFile.write(`fields: new Map([\n`);
jsFile.indent();
for (const field of getAllFields(decl)) {
jsFile.write(`['${field.name}', ${JSON.stringify(jsonify(field.typeNode))}],\n`);
}
jsFile.dedent();
jsFile.write(']),\n');
jsFile.dedent();
jsFile.write('},\n');
}
}
jsFile.dedent();
jsFile.write('};\n\n');
jsFile.write(fs.readFileSync(path.join(PACKAGE_ROOT, 'src', 'treegen', 'ast-template.js'), 'utf8'));
jsFile.write(`exported.kindToString = function (kind) {\n switch (kind) {\n`);
jsFile.indent(2);
for (const leafNode of leafNodes) {
jsFile.write(`case ${leafNode.index}: return '${leafNode.name}';\n`);
}
jsFile.dedent(2);
jsFile.write(` }\n}\n\n`);
for (const decl of nodeDecls) {
jsFile.write(`exported.is${decl.name} = function (value) {\n`);
jsFile.indent();
jsFile.write(`if (!isSyntax(value)) {\n return false;\n}\n`);
if (isLeafNode(decl.name)) {
jsFile.write(` return value.kind === ${decl.index};\n`);
} else {
jsFile.write('return ' + getAllChildren(decl).map(d => `value.kind === ${d.index}`).join(' || ') + '\n');
}
jsFile.dedent();
jsFile.write(`}\n`);
}
jsFile.write(`\nif (typeof module !== 'undefined') {\n module.exports = exported;\n}\n\n`)
// Write corresponding TypeScript declarations
dtsFile.write(`\nexport const enum SyntaxKind {\n`);
for (const decl of leafNodes) {
dtsFile.write(` ${decl.name} = ${decl.index},\n`);
}
dtsFile.write(`}\n\n`);
dtsFile.write(`
import { TextSpan } from "./text"
export type SyntaxRange = [Syntax, Syntax];
interface SyntaxBase {
kind: SyntaxKind;
parentNode: Syntax | null;
span: TextSpan | null;
origNodes: SyntaxRange | null;
}
`);
for (const decl of decls) {
if (decl.type === 'NodeDeclaration') {
if (isLeafNode(decl.name)) {
dtsFile.write(`export interface ${decl.name} extends SyntaxBase {\n`)
dtsFile.indent()
dtsFile.write(`kind: SyntaxKind.${decl.name};\n`);
for (const field of getAllFields(decl)) {
dtsFile.write(`${field.name}: ${emitTypeScriptType(field.typeNode)};\n`);
}
dtsFile.dedent();
dtsFile.write(`}\n\n`);
} else {
dtsFile.write(`export type ${decl.name}\n`);
let first = true;
dtsFile.indent();
for (const childDecl of getAllChildren(decl)) {
dtsFile.write((first ? '=' : '|') + ' ' + childDecl.name + '\n');
first = false;
}
dtsFile.dedent();
dtsFile.write('\n\n');
}
} else if (decl.type === 'EnumDeclaration') {
dtsFile.write(`export const enum ${decl.name} {\n`);
dtsFile.indent();
for (const field of decl.fields) {
dtsFile.write(`${field.name} = ${field.value},`);
}
dtsFile.dedent();
dtsFile.write('}\n\n');
}
}
for (const langName of langNames) {
dtsFile.write(`export type ${langName}Syntax\n`);
let first = true;
dtsFile.indent();
for (const decl of leafNodes) {
if (decl.name.startsWith(langName)) {
dtsFile.write((first ? '=' : '|') + ' ' + decl.name + '\n');
first = false;
}
}
dtsFile.dedent();
dtsFile.write('\n\n');
}
dtsFile.write(`export type Syntax\n`);
let first = true;
dtsFile.indent();
for (const decl of leafNodes) {
dtsFile.write((first ? '=' : '|') + ' ' + decl.name + '\n');
first = false;
}
dtsFile.dedent();
dtsFile.write('\n\n');
dtsFile.write('export function kindToString(kind: SyntaxKind): string;\n\n');
for (const decl of leafNodes) {
dtsFile.write(`export function create${decl.name}(`);
for (const field of getAllFields(decl)) {
dtsFile.write(`${field.name}: ${emitTypeScriptType(field.typeNode)}, `);
}
dtsFile.write(`span?: TextSpan | null, origNodes?: SyntaxRange | null): ${decl.name};\n`);
}
dtsFile.write('\n');
for (const decl of nodeDecls) {
dtsFile.write(`export function is${decl.name}(value: any): value is ${decl.name};\n`);
}
return {
jsFile: jsFile.currentText,
dtsFile: dtsFile.currentText,
};
// Below are some useful functions
function hasDeclarationNamed(name: string): boolean {
return name in declByName;
}
function emitTypeScriptType(typeNode: TypeNode): string {
if (typeNode.type === 'ReferenceTypeNode') {
if (hasDeclarationNamed(typeNode.name)) {
return typeNode.name;
} else if (typeNode.name === 'Option') {
return `${emitTypeScriptType(typeNode.typeArgs[0])} | null`;
} else if (typeNode.name === 'Vec') {
return `${emitTypeScriptType(typeNode.typeArgs[0])}[]`;
} else if (typeNode.name === 'String') {
return `string`;
} else if (typeNode.name === 'Int') {
return `bigint`;
} else if (typeNode.name === 'usize') {
return `number`;
} else if (typeNode.name === 'bool') {
return `boolean`;
} else {
throw new Error(`Could not emit TypeScript type for reference type node named ${typeNode.name}`);
}
} else if (typeNode.type === 'UnionTypeNode') {
return typeNode.elements.map(emitTypeScriptType).join(' | ');
}
throw new Error(`Could not emit TypeScript type for type node ${typeNode}`);
}
function getAllChildren(nodeDecl: NodeDeclaration): NodeDeclaration[] {
const out: NodeDeclaration[] = [];
const childNames = childrenOf[nodeDecl.name];
if (childNames === undefined || childNames.length === 0) {
out.push(nodeDecl);
} else {
for (const childName of childNames) {
const childDecl = getDeclarationNamed(childName)
if (childDecl.type !== 'NodeDeclaration') {
throw new Error(`Node ${nodeDecl.name} has a child named '${childDecl.name}' that is not a node.`);
}
pushAll(out, getAllChildren(childDecl));
}
}
return out;
}
function getAllFields(nodeDecl: NodeDeclaration) {
let out: NodeField[] = [];
pushAll(out, nodeDecl.fields);
for (const parentName of nodeDecl.parents) {
const parentDecl = getDeclarationNamed(parentName);
if (parentDecl.type !== 'NodeDeclaration') {
throw new Error(`Parent declaration '${parentName}' of '${nodeDecl.name}' must be a node declaration.`);
}
pushAll(out, getAllFields(parentDecl));
}
return out;
}
function getDeclarationNamed(name: string): Declaration {
const decl = declByName[name];
if (decl === undefined) {
throw new Error(`Declaration '${name}' was not found in any of the definition files.`);
}
return decl;
}
function isLeafNode(name: string): boolean {
const decl = getDeclarationNamed(name);
if (decl.type !== 'NodeDeclaration') {
return false;
}
return childrenOf[name] === undefined || childrenOf[name].length === 0;
}
}
function pushAll<T>(arr: T[], els: T[]): void {
for (const el of els) {
arr.push(el);
}
}
function isNode(value: any): value is Syntax {
return typeof value === 'object' && value !== null && value.__IS_NODE;
}
function jsonify(value: any) {
function visitNode(node: any) {
const obj: any = {};
for (const key of Object.keys(node)) {
if (key !== 'type' && key !== 'span' && key !== '__IS_NODE') {
const value = node[key];
if (Array.isArray(value)) {
obj[key] = value.map(visit);
} else {
obj[key] = visit(value);
}
}
}
return obj;
}
function visit(value: any) {
if (isNode(value)) {
return visitNode(value);
} else {
return value;
}
}
return visit(value);
}
function stripSuffix(str: string, suffix: string): string {
if (!str.endsWith(suffix)) {
return str;
}
return str.substring(0, str.length-suffix.length);
}
function getFileStem(filepath: string) {
return path.basename(filepath).split('.')[0];
}

View file

@ -26,6 +26,7 @@ Declaration
= NodeDeclaration
/ EnumDeclaration
/ TypeDeclaration
/ LanguageDeclaration
NodeDeclaration
= NodeToken __ name:Identifier parents:(__ '>' __ @ExtendsList)? fields:(__ '{' __ @(@NodeField __)* '}')? EOS {
@ -37,6 +38,11 @@ ExtendsList
return [head, ...tail];
}
LanguageDeclaration
= '@' LanguageToken __ name:Identifier EOS {
return createNode('LanguageDeclaration', { name });
}
NodeField
= name:Identifier __ ':' __ typeNode:TypeNode EOD {
return createNode('NodeField', { name, typeNode });
@ -98,6 +104,7 @@ HexInteger
EnumToken = 'enum' !IdentifierPart
NodeToken = 'node' !IdentifierPart
TypeToken = 'type' !IdentifierPart
LanguageToken = 'language' !IdentifierPart
SourceCharacter
= .

49
src/treegen/util.ts Normal file
View file

@ -0,0 +1,49 @@
function isWhiteSpace(ch: string) {
return /[\r\t ]/.test(ch);
}
export interface FileWriterOptions {
indentStr?: string;
startIndent?: number;
indentWidth?: number;
}
export class FileWriter {
public currentText = '';
private atBlankLine = true;
private currentIndent: number;
private indentStr: string;
private indentWidth: number;
constructor(opts: FileWriterOptions = {}) {
this.indentStr = opts.indentStr ?? ' ';
this.indentWidth = opts.indentWidth ?? 2;
this.currentIndent = (opts.startIndent ?? 0) * this.indentWidth;
}
public indent(count = 1) {
this.currentIndent += this.indentWidth * count;
}
public dedent(count = 1) {
this.currentIndent -= this.indentWidth * count;
}
public write(str: string) {
for (const ch of str) {
if (ch === '\n') {
this.atBlankLine = true;
} else if (!(this.atBlankLine && isWhiteSpace(ch))) {
if (this.atBlankLine) {
this.currentText += this.indentStr.repeat(this.currentIndent)
}
this.atBlankLine = false;
}
this.currentText += ch;
}
}
}

View file

@ -2,10 +2,54 @@
import * as path from "path"
import * as fs from "fs"
import { TextSpan } from "./text"
import { kindToString, Syntax, BoltToken, BoltQualName, BoltDeclaration, BoltDeclarationModifiers } from "./ast"
export type BoltTokenStream = Stream<BoltToken>;
export interface JsonArray extends Array<Json> { };
export interface JsonObject { [key: string]: Json }
export type Json = null | string | boolean | number | JsonArray | JsonObject;
export interface FastStringMap<T> {
[key: string]: T
}
const supportedLanguages = ['Bolt', 'JS'];
export function getLanguage(node: Syntax): string {
const kindStr = kindToString(node.kind);
for (const prefix of supportedLanguages) {
if (kindStr.startsWith(prefix)) {
return prefix;
}
}
throw new Error(`Could not determine the language of ${kindStr}`);
}
export function cloneSpan(span: TextSpan | null) {
if (span === null) {
return null;
}
return span.clone();
}
export function setOrigNodeRange(node: Syntax, startNode: Syntax, endNode: Syntax): void {
node.span = new TextSpan(startNode.span!.file, startNode.span!.start.clone(), endNode.span!.end.clone());
}
export function hasPublicModifier(node: BoltDeclaration) {
return (node.modifiers & BoltDeclarationModifiers.IsPublic) > 0;
}
export function getFullTextOfQualName(node: BoltQualName) {
let out = ''
for (const element of node.modulePath) {
out += element.text + '.';
}
return out + node.name.text;
}
export interface Stream<T> {
get(): T;
peek(count?: number): T;
@ -51,51 +95,7 @@ export function upsearchSync(filename: string, startDir = '.') {
}
}
function isWhiteSpace(ch: string) {
return /[\r\t ]/.test(ch);
export function getFileStem(filepath: string): string {
return path.basename(filepath).split('.')[0];
}
export interface FileWriterOptions {
indentStr?: string;
startIndent?: number;
indentWidth?: number;
}
export class FileWriter {
public currentText = '';
private atBlankLine = true;
private currentIndent: number;
private indentStr: string;
private indentWidth: number;
constructor(opts: FileWriterOptions = {}) {
this.indentStr = opts.indentStr ?? ' ';
this.indentWidth = opts.indentWidth ?? 2;
this.currentIndent = (opts.startIndent ?? 0) * this.indentWidth;
}
public indent(count = 1) {
this.currentIndent += this.indentWidth * count;
}
public dedent(count = 1) {
this.currentIndent -= this.indentWidth * count;
}
public write(str: string) {
for (const ch of str) {
if (ch === '\n') {
this.atBlankLine = true;
} else if (!(this.atBlankLine && isWhiteSpace(ch))) {
if (this.atBlankLine) {
this.currentText += this.indentStr.repeat(this.currentIndent)
}
this.atBlankLine = false;
}
this.currentText += ch;
}
}
}