Update code

- Move foreign language interfaces to seperate directories
 - Extend the JavaScript scanner and parser to be able to parse simple
   call expressions and member expressions
 - Fix multiple issues in expander.ts
 - Move shared scanning/parsing utilities to util.ts
This commit is contained in:
Sam Vervaeck 2020-05-22 21:29:14 +02:00
parent 70b219d6df
commit d1ba2cb540
8 changed files with 736 additions and 451 deletions

142
src/ast.d.ts vendored
View file

@ -73,23 +73,33 @@ export const enum SyntaxKind {
JSReturnKeyword = 88,
JSTryKeyword = 89,
JSCatchKeyword = 90,
JSBindPattern = 92,
JSConstantExpression = 94,
JSMemberExpression = 96,
JSCallExpression = 97,
JSBinaryExpression = 98,
JSUnaryExpression = 99,
JSNewExpression = 100,
JSSequenceExpression = 101,
JSConditionalExpression = 102,
JSReferenceExpression = 103,
JSExpressionStatement = 106,
JSConditionalStatement = 107,
JSParameter = 108,
JSFunctionDeclaration = 111,
JSArrowFunctionDeclaration = 112,
JSLetDeclaration = 113,
JSSourceFile = 114,
JSCloseBrace = 91,
JSCloseBracket = 92,
JSCloseParen = 93,
JSOpenBrace = 94,
JSOpenBracket = 95,
JSOpenParen = 96,
JSSemi = 97,
JSComma = 98,
JSDot = 99,
JSDotDotDot = 100,
JSBindPattern = 102,
JSConstantExpression = 104,
JSMemberExpression = 105,
JSCallExpression = 106,
JSBinaryExpression = 107,
JSUnaryExpression = 108,
JSNewExpression = 109,
JSSequenceExpression = 110,
JSConditionalExpression = 111,
JSReferenceExpression = 112,
JSExpressionStatement = 115,
JSConditionalStatement = 116,
JSParameter = 117,
JSFunctionDeclaration = 120,
JSArrowFunctionDeclaration = 121,
JSLetDeclaration = 122,
JSSourceFile = 123,
}
@ -586,6 +596,16 @@ export type JSToken
| JSReturnKeyword
| JSTryKeyword
| JSCatchKeyword
| JSCloseBrace
| JSCloseBracket
| JSCloseParen
| JSOpenBrace
| JSOpenBracket
| JSOpenParen
| JSSemi
| JSComma
| JSDot
| JSDotDotDot
export interface JSOperator extends SyntaxBase {
@ -610,6 +630,46 @@ export interface JSCatchKeyword extends SyntaxBase {
kind: SyntaxKind.JSCatchKeyword;
}
export interface JSCloseBrace extends SyntaxBase {
kind: SyntaxKind.JSCloseBrace;
}
export interface JSCloseBracket extends SyntaxBase {
kind: SyntaxKind.JSCloseBracket;
}
export interface JSCloseParen extends SyntaxBase {
kind: SyntaxKind.JSCloseParen;
}
export interface JSOpenBrace extends SyntaxBase {
kind: SyntaxKind.JSOpenBrace;
}
export interface JSOpenBracket extends SyntaxBase {
kind: SyntaxKind.JSOpenBracket;
}
export interface JSOpenParen extends SyntaxBase {
kind: SyntaxKind.JSOpenParen;
}
export interface JSSemi extends SyntaxBase {
kind: SyntaxKind.JSSemi;
}
export interface JSComma extends SyntaxBase {
kind: SyntaxKind.JSComma;
}
export interface JSDot extends SyntaxBase {
kind: SyntaxKind.JSDot;
}
export interface JSDotDotDot extends SyntaxBase {
kind: SyntaxKind.JSDotDotDot;
}
export type JSPattern
= JSBindPattern
@ -636,14 +696,10 @@ export interface JSConstantExpression extends SyntaxBase {
value: BoltValue;
}
export const enum JSMemberExpressionModifiers {
Computed = 1,}
export interface JSMemberExpression extends SyntaxBase {
kind: SyntaxKind.JSMemberExpression;
value: JSExpression;
property: JSExpression;
modifiers: JSMemberExpressionModifiers;
property: JSIdentifier;
}
export interface JSCallExpression extends SyntaxBase {
@ -830,6 +886,16 @@ export type JSSyntax
| JSReturnKeyword
| JSTryKeyword
| JSCatchKeyword
| JSCloseBrace
| JSCloseBracket
| JSCloseParen
| JSOpenBrace
| JSOpenBracket
| JSOpenParen
| JSSemi
| JSComma
| JSDot
| JSDotDotDot
| JSBindPattern
| JSConstantExpression
| JSMemberExpression
@ -923,6 +989,16 @@ export type Syntax
| JSReturnKeyword
| JSTryKeyword
| JSCatchKeyword
| JSCloseBrace
| JSCloseBracket
| JSCloseParen
| JSOpenBrace
| JSOpenBracket
| JSOpenParen
| JSSemi
| JSComma
| JSDot
| JSDotDotDot
| JSBindPattern
| JSConstantExpression
| JSMemberExpression
@ -1017,9 +1093,19 @@ export function createJSIdentifier(text: string, span?: TextSpan | null): JSIden
export function createJSReturnKeyword(span?: TextSpan | null): JSReturnKeyword;
export function createJSTryKeyword(span?: TextSpan | null): JSTryKeyword;
export function createJSCatchKeyword(span?: TextSpan | null): JSCatchKeyword;
export function createJSCloseBrace(span?: TextSpan | null): JSCloseBrace;
export function createJSCloseBracket(span?: TextSpan | null): JSCloseBracket;
export function createJSCloseParen(span?: TextSpan | null): JSCloseParen;
export function createJSOpenBrace(span?: TextSpan | null): JSOpenBrace;
export function createJSOpenBracket(span?: TextSpan | null): JSOpenBracket;
export function createJSOpenParen(span?: TextSpan | null): JSOpenParen;
export function createJSSemi(span?: TextSpan | null): JSSemi;
export function createJSComma(span?: TextSpan | null): JSComma;
export function createJSDot(span?: TextSpan | null): JSDot;
export function createJSDotDotDot(span?: TextSpan | null): JSDotDotDot;
export function createJSBindPattern(name: JSIdentifier, span?: TextSpan | null): JSBindPattern;
export function createJSConstantExpression(value: BoltValue, span?: TextSpan | null): JSConstantExpression;
export function createJSMemberExpression(value: JSExpression, property: JSExpression, modifiers: JSMemberExpressionModifiers, span?: TextSpan | null): JSMemberExpression;
export function createJSMemberExpression(value: JSExpression, property: JSIdentifier, span?: TextSpan | null): JSMemberExpression;
export function createJSCallExpression(operator: JSExpression, operands: JSExpression[], span?: TextSpan | null): JSCallExpression;
export function createJSBinaryExpression(left: JSExpression, operator: JSOperator, right: JSExpression, span?: TextSpan | null): JSBinaryExpression;
export function createJSUnaryExpression(operator: JSOperator, operand: JSExpression, span?: TextSpan | null): JSUnaryExpression;
@ -1121,6 +1207,16 @@ export function isJSIdentifier(value: any): value is JSIdentifier;
export function isJSReturnKeyword(value: any): value is JSReturnKeyword;
export function isJSTryKeyword(value: any): value is JSTryKeyword;
export function isJSCatchKeyword(value: any): value is JSCatchKeyword;
export function isJSCloseBrace(value: any): value is JSCloseBrace;
export function isJSCloseBracket(value: any): value is JSCloseBracket;
export function isJSCloseParen(value: any): value is JSCloseParen;
export function isJSOpenBrace(value: any): value is JSOpenBrace;
export function isJSOpenBracket(value: any): value is JSOpenBracket;
export function isJSOpenParen(value: any): value is JSOpenParen;
export function isJSSemi(value: any): value is JSSemi;
export function isJSComma(value: any): value is JSComma;
export function isJSDot(value: any): value is JSDot;
export function isJSDotDotDot(value: any): value is JSDotDotDot;
export function isJSPattern(value: any): value is JSPattern;
export function isJSBindPattern(value: any): value is JSBindPattern;
export function isJSExpression(value: any): value is JSExpression;

View file

@ -8,11 +8,10 @@ import {
kindToString,
BoltSyntax,
BoltSentence,
createBoltEOS,
createBoltRecordPattern,
createBoltExpressionPattern,
createBoltIdentifier,
createBoltReferenceTypeNode,
createBoltReferenceTypeExpression,
createBoltConstantExpression,
createBoltTuplePattern,
createBoltQualName,
@ -24,7 +23,7 @@ import {
createBoltSourceFile,
BoltPattern,
BoltSourceElement,
BoltReferenceTypeNode,
BoltReferenceTypeExpression,
createBoltRecordDeclaration,
createBoltRecordDeclarationField,
isBoltSourceElement,
@ -34,7 +33,8 @@ import {
import { TextSpan } from "./text"
import { TypeChecker } from "./checker"
import { Parser, ParseError } from "./parser"
import { ParseError } from "./util"
import { Parser } from "./parser"
import { Evaluator, TRUE, FALSE } from "./evaluator"
import { StreamWrapper, setOrigNodeRange, BoltTokenStream, createTokenStream } from "./util"
@ -43,9 +43,9 @@ interface Transformer {
transform: (node: BoltTokenStream) => BoltSyntax;
}
function createSimpleBoltReferenceTypeNode(text: string): BoltReferenceTypeNode {
function createSimpleBoltReferenceTypeExpression(text: string): BoltReferenceTypeExpression {
const ids = text.split('.').map(name => createBoltIdentifier(name))
return createBoltReferenceTypeNode(createBoltQualName(ids.slice(0, -1), ids[ids.length-1]), [])
return createBoltReferenceTypeExpression(createBoltQualName(ids.slice(0, -1), ids[ids.length-1]), [])
}
/// This is actually a hand-parsed version of the following:
@ -64,26 +64,26 @@ function createSimpleBoltReferenceTypeNode(text: string): BoltReferenceTypeNode
/// }
//const PATTERN_SYNTAX: BoltPattern =
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Sentence'),
// createSimpleBoltReferenceTypeExpression('Bolt.AST.Sentence'),
// [
// createBoltRecordDeclarationField(
// createBoltIdentifier('elements'),
// createBoltTuplePattern([
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Identifier'),
// createSimpleBoltReferenceTypeExpression('Bolt.AST.Identifier'),
// [{
// name: createBoltIdentifier('text'),
// pattern: createBoltConstantExpression('syntax')
// }]
// ),
// createBoltRecordPattern(
// createSimpleBoltReferenceTypeNode('Bolt.AST.Braced'),
// createSimpleBoltReferenceTypeExpression('Bolt.AST.Braced'),
// [{
// name: createBoltIdentifier('elements'),
// pattern: createBoltTuplePattern([
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression')))
// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))),
// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression')))
// ])
// }]
// )

17
src/foreign/index.ts Normal file
View file

@ -0,0 +1,17 @@
import { TextFile, TextPos } from "../text"
import { JSScanner } from "./js/scanner"
import { JSParser } from "./js/parser"
export function parseForeignLanguage(langName: string, text: string, file: TextFile, offset: TextPos) {
switch (langName) {
case "JS":
const scanner = new JSScanner(file, text, offset);
const parser = new JSParser();
return parser.parseJSSourceElementList(scanner)
default:
throw new Error(`Did not know how to parse a foreign language named ${langName}.`);
}
}

117
src/foreign/js/parser.ts Normal file
View file

@ -0,0 +1,117 @@
import { Stream, assertToken, setOrigNodeRange, ParseError } from "../../util"
import {
SyntaxKind,
JSToken,
JSStatement,
JSSourceElement,
JSExpressionStatement,
createJSExpressionStatement,
JSExpression,
JSReferenceExpression,
createJSReferenceExpression,
JSIdentifier,
JSMemberExpression,
createJSMemberExpression,
createJSCallExpression
} from "../../ast"
export type JSTokenStream = Stream<JSToken>;
export class JSParser {
public parseJSReferenceExpression(tokens: JSTokenStream): JSReferenceExpression {
const t0 = tokens.get();
assertToken(t0, SyntaxKind.JSIdentifier);
const result = createJSReferenceExpression((t0 as JSIdentifier).text);
setOrigNodeRange(result, t0, t0);
return result;
}
private parsePrimitiveJSExpression(tokens: JSTokenStream): JSExpression {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.JSIdentifier) {
return this.parseJSReferenceExpression(tokens);
} else {
throw new ParseError(t0, [SyntaxKind.JSIdentifier]);
}
}
public parseJSExpression(tokens: JSTokenStream): JSExpression {
const firstToken = tokens.peek();
let result = this.parsePrimitiveJSExpression(tokens);
while (true) {
const t1 = tokens.peek();
if (t1.kind === SyntaxKind.JSCloseBrace || t1.kind === SyntaxKind.JSCloseParen || t1.kind === SyntaxKind.JSCloseBracket || t1.kind === SyntaxKind.JSSemi) {
break;
}
if (t1.kind === SyntaxKind.JSDot) {
tokens.get();
const t2 = tokens.get();
assertToken(t2, SyntaxKind.JSIdentifier);
const oldResult = result;
result = createJSMemberExpression(oldResult, t2 as JSIdentifier);
setOrigNodeRange(result, oldResult, t2);
} else if (t1.kind === SyntaxKind.JSOpenBracket) {
tokens.get();
// TODO
} else if (t1.kind === SyntaxKind.JSOpenParen) {
tokens.get();
let lastToken;
let args: JSExpression[] = [];
while (true) {
const t2 = tokens.peek();
if (t2.kind === SyntaxKind.JSCloseParen) {
lastToken = t2;
break;
}
args.push(this.parseJSExpression(tokens));
const t3 = tokens.get();
if (t3.kind === SyntaxKind.JSCloseParen) {
lastToken = t3;
break;
} else {
assertToken(t3, SyntaxKind.JSComma);
}
}
const oldResult = result;
result = createJSCallExpression(oldResult, args);
setOrigNodeRange(result, firstToken, lastToken);
} else {
throw new ParseError(t1, [SyntaxKind.JSDot, SyntaxKind.JSOpenBracket]);
}
}
return result;
}
public parseJSExpressionStatement(tokens: JSTokenStream): JSExpressionStatement {
const expr = this.parseJSExpression(tokens);
const result = createJSExpressionStatement(expr);
setOrigNodeRange(result, expr, expr);
return result;
}
public parseJSStatement(tokens: JSTokenStream): JSStatement {
return this.parseJSExpressionStatement(tokens);
}
public parseJSSourceElementList(tokens: JSTokenStream): JSSourceElement[] {
const elements: JSSourceElement[] = [];
while (true) {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.EndOfFile) {
break;
}
if (t0.kind === SyntaxKind.JSSemi) {
tokens.get();
continue;
}
const statement = this.parseJSStatement(tokens)
elements.push(statement);
}
return elements;
}
}

272
src/foreign/js/scanner.ts Normal file
View file

@ -0,0 +1,272 @@
import XRegExp from "xregexp"
import { TextPos, TextSpan, TextFile } from "../../text"
import { EOF, ScanError } from "../../util"
import {
JSToken,
createJSIdentifier,
createJSDot,
createJSDotDotDot,
createJSOpenBracket,
createJSCloseBracket,
createJSCloseParen,
createJSOpenParen,
createJSOpenBrace,
createJSCloseBrace,
createJSSemi,
createJSComma,
createEndOfFile,
} from "../../ast"
function isWhiteSpace(ch: string): boolean {
return /[\u0009\u000B\u000C\u0020\u00A0\u000B\uFEFF\p{Zs}]/.test(ch)
}
function isLineTerminator(ch: string): boolean {
return ch === '\u000A'
|| ch === '\u000D'
|| ch === '\u2028'
|| ch === '\u2029';;
}
function isIdentStart(ch: string): boolean {
return /[\p{ID_Start}$_\\]/u.test(ch)
}
function isIdentPart(ch: string): boolean {
return /[\u200C\u200D\p{ID_Continue}$\\]/u.test(ch)
}
export class JSScanner {
private buffer: string[] = [];
private scanned: JSToken[] = [];
private offset = 0;
constructor(
private file: TextFile,
private input: string,
private currPos: TextPos = new TextPos(0,1,1),
) {
}
protected readChar() {
if (this.offset === this.input.length) {
return EOF
}
return this.input[this.offset++]
}
protected peekChar(count = 1) {
while (this.buffer.length < count) {
this.buffer.push(this.readChar());
}
return this.buffer[count - 1];
}
protected getChar() {
const ch = this.buffer.length > 0
? this.buffer.shift()!
: this.readChar()
if (ch == EOF) {
return EOF
}
if (isLineTerminator(ch)) {
this.currPos.line += 1;
this.currPos.column = 1;
} else {
this.currPos.column += 1;
}
this.currPos.offset += 1;
return ch
}
private assertChar(expected: string) {
const actual = this.getChar();
if (actual !== expected) {
throw new ScanError(this.file, this.currPos.clone(), actual);
}
}
private scanLineComment(): string {
let text = '';
this.assertChar('/');
this.assertChar('/')
while (true) {
const c2 = this.peekChar();
if (isLineTerminator(c2)) {
this.getChar();
if (this.peekChar() === '\r') {
this.getChar();
}
break;
}
if (c2 === EOF) {
break;
}
text += this.getChar();
}
return text;
}
private scanMultiLineComment(): string {
let text = '';
while (true) {
const c2 = this.getChar();
if (c2 === '*') {
const c3 = this.getChar();
if (c3 === '/') {
break;
}
text += c2 + c3;
} else if (c2 === EOF) {
throw new ScanError(this.file, this.currPos.clone(), c2);
} else {
text += c2;
}
}
return text;
}
private skipComments() {
while (true) {
const c0 = this.peekChar();
if (c0 === '/') {
const c1 = this.peekChar(2);
if (c1 == '/') {
this.scanLineComment();
} else if (c1 === '*') {
this.scanMultiLineComment();
} else {
break;
}
} else if (isWhiteSpace(c0) || isLineTerminator(c0)) {
this.getChar();
} else {
break;
}
}
}
private scanHexDigit(): number {
const startPos = this.currPos.clone();
const c0 = this.getChar();
switch (c0) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 0;
case 'A': return 10;
case 'B': return 11;
case 'C': return 12;
case 'D': return 13;
case 'E': return 14;
case 'F': return 15;
case 'a': return 10;
case 'b': return 11;
case 'c': return 12;
case 'd': return 13;
case 'e': return 14;
case 'f': return 15;
default:
throw new ScanError(this.file, startPos, c0);
}
}
private scanUnicodeEscapeSequence() {
throw new Error(`Scanning unicode escape sequences is not yet implemented.`);
}
public scan(): JSToken {
this.skipComments();
const c0 = this.peekChar();
if (c0 === EOF) {
return createEndOfFile(new TextSpan(this.file, this.currPos.clone(), this.currPos.clone()))
}
const startPos = this.currPos.clone();
if (/[,;()\[\]{}]/.test(c0)) {
this.getChar();
const span = new TextSpan(this.file, startPos, this.currPos.clone());
switch (c0) {
case '(': return createJSOpenParen(span);
case ')': return createJSCloseParen(span);
case '[': return createJSOpenBracket(span);
case ']': return createJSCloseBracket(span);
case '{': return createJSOpenBrace(span);
case '}': return createJSCloseBrace(span);
case ',': return createJSComma(span);
case ';': return createJSSemi(span);
}
}
let i = 0;
let ch = c0;
while (ch === '.') {
this.getChar();
ch = this.peekChar();
i++;
}
if (i > 0) {
if (i === 1) {
return createJSDot(new TextSpan(this.file, startPos, this.currPos.clone()));
} else if (i === 3) {
return createJSDotDotDot(new TextSpan(this.file, startPos, this.currPos.clone()));
} else {
throw new ScanError(this.file, startPos, c0);
}
}
if (isIdentStart(c0)) {
let name = '';
while (true) {
const c0 = this.peekChar();
if (!isIdentPart(c0)) {
break;
}
if (c0 === '\\') {
name += this.scanUnicodeEscapeSequence();
} else {
name += this.getChar();
}
}
const endPos = this.currPos.clone();
return createJSIdentifier(name, new TextSpan(this.file, startPos, endPos))
} else {
throw new ScanError(this.file, startPos, c0);
}
}
public peek(count = 1): JSToken {
while (this.scanned.length < count) {
this.scanned.push(this.scan());
}
return this.scanned[count - 1];
}
public get(): JSToken {
return this.scanned.length > 0
? this.scanned.shift()!
: this.scan();
}
}

View file

@ -1,5 +1,3 @@
)
import * as acorn from "acorn"
import {
SyntaxKind,
@ -48,120 +46,26 @@ import {
createBoltFunctionDeclaration,
createBoltCallExpression,
BoltSymbol,
JSSourceElement,
JSStatement,
BoltTypeParameter,
createBoltTypePattern,
createBoltTypeParameter,
} from "./ast"
import { Scanner } from "./scanner"
import { parseForeignLanguage } from "./foreign"
import { Stream, setOrigNodeRange, createTokenStream, uniq, FastStringMap } from "./util"
import {
Stream,
OperatorKind,
OperatorTable,
assertToken,
ParseError,
setOrigNodeRange,
createTokenStream,
uniq,
} from "./util"
export type BoltTokenStream = Stream<BoltToken>;
export type JSTokenStream = Stream<JSToken>;
function describeKind(kind: SyntaxKind): string {
switch (kind) {
case SyntaxKind.BoltIdentifier:
return "an identifier"
case SyntaxKind.BoltOperator:
return "an operator"
case SyntaxKind.BoltStringLiteral:
return "a string"
case SyntaxKind.BoltIntegerLiteral:
return "an integer"
case SyntaxKind.BoltFnKeyword:
return "'fn'"
case SyntaxKind.BoltForeignKeyword:
return "'foreign'"
case SyntaxKind.BoltMatchKeyword:
return "'match'";
case SyntaxKind.BoltYieldKeyword:
return "'yield'";
case SyntaxKind.BoltReturnKeyword:
return "'return'";
case SyntaxKind.BoltPubKeyword:
return "'pub'"
case SyntaxKind.BoltLetKeyword:
return "'let'"
case SyntaxKind.BoltSemi:
return "';'"
case SyntaxKind.BoltColon:
return "':'"
case SyntaxKind.BoltDot:
return "'.'"
case SyntaxKind.BoltRArrow:
return "'->'"
case SyntaxKind.BoltComma:
return "','"
case SyntaxKind.BoltModKeyword:
return "'mod'"
case SyntaxKind.BoltStructKeyword:
return "'struct'"
case SyntaxKind.BoltEnumKeyword:
return "'enum'"
case SyntaxKind.BoltTypeKeyword:
return "'type'";
case SyntaxKind.BoltBraced:
return "'{' .. '}'"
case SyntaxKind.BoltBracketed:
return "'[' .. ']'"
case SyntaxKind.BoltParenthesized:
return "'(' .. ')'"
case SyntaxKind.EndOfFile:
return "'}', ')', ']' or end-of-file"
case SyntaxKind.BoltLtSign:
return "'<'";
case SyntaxKind.BoltGtSign:
return "'<'";
case SyntaxKind.BoltEqSign:
return "'='";
default:
throw new Error(`failed to describe ${kindToString(kind)}`)
}
}
function enumerate(elements: string[]) {
if (elements.length === 1) {
return elements[0]
} else {
return elements.slice(0, elements.length-1).join(', ') + ' or ' + elements[elements.length-1]
}
}
export class ParseError extends Error {
constructor(public actual: BoltToken, public expected: SyntaxKind[]) {
super(`${actual.span!.file.origPath}:${actual.span!.start.line}:${actual.span!.start.column}: expected ${enumerate(expected.map(e => describeKind(e)))} but got ${describeKind(actual.kind)}`)
}
}
enum OperatorKind {
Prefix,
InfixL,
InfixR,
Suffix,
}
function isRightAssoc(kind: OperatorKind) {
return kind === OperatorKind.InfixR;
}
interface OperatorInfo {
kind: OperatorKind;
arity: number;
name: string;
precedence: number;
}
function assertToken(node: BoltToken, kind: SyntaxKind) {
if (node.kind !== kind) {
throw new ParseError(node, [kind]);
}
}
const KIND_EXPRESSION_T0 = [
SyntaxKind.BoltStringLiteral,
SyntaxKind.BoltIntegerLiteral,
@ -198,34 +102,6 @@ const KIND_SOURCEELEMENT_T0 = uniq([
...KIND_DECLARATION_T0,
])
type OperatorTableMatrix = [OperatorKind, number, string][][];
class OperatorTable {
private operatorsByName = new FastStringMap<string, OperatorInfo>();
//private operatorsByPrecedence = FastStringMap<number, OperatorInfo>();
constructor(definitions: OperatorTableMatrix) {
let i = 0;
for (const group of definitions) {
for (const [kind, arity, name] of group) {
const info = { kind, arity, name, precedence: i }
this.operatorsByName.set(name, info);
//this.operatorsByPrecedence[i] = info;
}
i++;
}
}
public lookup(name: string): OperatorInfo | null {
if (!this.operatorsByName.has(name)) {
return null;
}
return this.operatorsByName.get(name);
}
}
export class Parser {
exprOperatorTable = new OperatorTable([
@ -922,16 +798,13 @@ export class Parser {
case "Bolt":
body = this.parseStatements(tokens);
break;
case "JS":
const scanner = new Scanner(t3.span!.file, t3.text);
body = this.parseJSSourceElementList(scanner);
break;
default:
throw new Error(`Unrecognised language: ${target}`);
body = parseForeignLanguage(target, t3.text, t3.span!.file, t3.span!.start);
break;
}
}
const node = createBoltFunctionDeclaration(
const result = createBoltFunctionDeclaration(
modifiers,
target,
name,
@ -939,28 +812,11 @@ export class Parser {
returnType,
body
);
setOrigNodeRange(node, firstToken, lastToken!);
return node;
setOrigNodeRange(result, firstToken, lastToken!);
return result;
}
//public parseModuleDeclaration(tokens: BoltTokenStream): BoltModule {
//let modifiers = 0;
//let t0 = tokens.get();
//if (t0.kind === SyntaxKind.BoltPubKeyword) {
//modifiers |= BoltDeclarationModifiers.Public;
//t0 = tokens.get();
//}
//assertToken(t0, SyntaxKind.BoltModKeyword);
//const name = this.parseQualName(tokens);
//const t1 = tokens.get();
//assertToken(t1, SyntaxKind.BoltBraced);
//const elements = this.parseSourceElementList(createTokenStream(t1));
//const node = createBoltModule(modifiers, name, elements);
//setOrigNodeRange(node, t0, t1);
//return node;
//}
public parseDeclaration(tokens: BoltTokenStream): BoltDeclaration {
let t0 = tokens.peek(1);
let i = 1;
@ -1091,22 +947,5 @@ export class Parser {
}
public parseJSStatement(tokens: JSTokenStream): JSStatement {
return this.parseJSExpressionStatement(tokens);
}
public parseJSSourceElementList(tokens: JSTokenStream): JSSourceElement[] {
const elements: JSSourceElement[] = [];
while (true) {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.EndOfFile) {
break;
}
const statement = this.parseJSStatement(tokens)
elements.push(statement);
}
return elements;
}
}

View file

@ -1,6 +1,8 @@
import XRegExp from "xregexp"
import { EOF, ScanError } from "./util"
import {
TextFile,
TextPos,
@ -53,29 +55,6 @@ export enum PunctType {
Brace,
}
function escapeChar(ch: string) {
switch (ch) {
case '\a': return '\\a';
case '\b': return '\\b';
case '\f': return '\\f';
case '\n': return '\\n';
case '\r': return '\\r';
case '\t': return '\\t';
case '\v': return '\\v';
case '\0': return '\\0';
case '\'': return '\\\'';
default:
const code = ch.charCodeAt(0);
if (code >= 0x20 && code <= 0x7E) {
return ch
} else if (code < 0x7F) {
return `\\x${code.toString(16).padStart(2, '0')}`
} else {
return `\\u${code.toString(16).padStart(4, '0')}`
}
}
}
function getPunctType(ch: string) {
switch (ch) {
case '(':
@ -114,11 +93,6 @@ function isOpenPunct(ch: string) {
}
}
class ScanError extends Error {
constructor(public file: TextFile, public position: TextPos, public char: string) {
super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
}
}
function isDigit(ch: string) {
return XRegExp('\\p{Nd}').test(ch)
@ -144,32 +118,6 @@ function isSymbol(ch: string) {
return /[=+\/\-*%$!><&^|]/.test(ch)
}
function isJSWhiteSpace(ch: string): boolean {
return ch === '\u0009'
|| ch === '\u000B'
|| ch === '\u000C'
|| ch === '\u0020'
|| ch === '\u00A0'
|| ch === '\u000B'
|| ch === '\uFEFF'
|| XRegExp('\\p{Zs}').test(ch)
}
function isJSIdentStart(ch: string): boolean {
return XRegExp('[\\p{ID_Start}$_\\]').test(ch)
}
function isJSIdentPart(ch: string): boolean {
return XRegExp('[\u200C\u200D\\p{ID_Continue}$\\]').test(ch)
}
//function isOperatorPart(ch: string) {
//return /[=+\-*\/%$!><]/.test(ch)
//}
const EOF = ''
export class Scanner {
protected buffer: string[] = [];
@ -449,187 +397,3 @@ export class Scanner {
}
export class JSScanner {
private buffer: string[] = [];
private scanned: JSToken[] = [];
private offset = 0;
constructor(
private file: TextFile,
private input: string,
private currPos: TextPos = new TextPos(0,1,1),
) {
}
protected readChar() {
if (this.offset == this.input.length) {
return EOF
}
return this.input[this.offset++]
}
protected peekChar(count = 1) {
while (this.buffer.length < count) {
this.buffer.push(this.readChar());
}
return this.buffer[count - 1];
}
protected getChar() {
const ch = this.buffer.length > 0
? this.buffer.shift()!
: this.readChar()
if (ch == EOF) {
return EOF
}
if (isNewLine(ch)) {
this.currPos.line += 1;
this.currPos.column = 1;
} else {
this.currPos.column += 1;
}
this.currPos.offset += 1;
return ch
}
private assertChar(expected: string) {
const actual = this.getChar();
if (actual !== expected) {
throw new ScanError(this.file, this.currPos.clone(), actual);
}
}
private scanLineComment(): string {
let text = '';
this.assertChar('/');
this.assertChar('/')
while (true) {
const c2 = this.peekChar();
if (c2 === '\n') {
this.getChar();
if (this.peekChar() === '\r') {
this.getChar();
}
break;
}
if (c2 === EOF) {
break;
}
text += this.getChar();
}
return text;
}
private scanMultiLineComment(): string {
let text = '';
while (true) {
const c2 = this.getChar();
if (c2 === '*') {
const c3 = this.getChar();
if (c3 === '/') {
break;
}
text += c2 + c3;
} else if (c2 === EOF) {
throw new ScanError(this.file, this.currPos.clone(), c2);
} else {
text += c2;
}
}
return text;
}
private skipComments() {
while (true) {
const c0 = this.peekChar();
if (c0 === '/') {
const c1 = this.peekChar(2);
if (c1 == '/') {
this.scanLineComment();
} else if (c1 === '*') {
this.scanMultiLineComment();
} else {
break;
}
} else if (isWhiteSpace(c0)) {
this.getChar();
} else {
break;
}
}
}
private scanHexDigit(): number {
const startPos = this.currPos.clone();
const c0 = this.getChar();
switch (c0.toLowerCase()) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 0;
case 'a': return 10;
case 'b': return 11;
case 'c': return 12;
case 'd': return 13;
case 'e': return 14;
case 'f': return 15;
default:
throw new ScanError(this.file, startPos, c0);
}
}
private scanUnicodeEscapeSequence() {
throw new Error(`Scanning unicode escape sequences is not yet implemented.`);
}
public scan(): JSToken {
this.skipComments();
const c0 = this.peekChar();
const startPos = this.currPos.clone();
if (isJSIdentStart(c0)) {
let name = '';
while (true) {
const c0 = this.peekChar();
if (!isJSIdentPart(c0)) {
break;
}
if (c0 === '\\') {
name += this.scanUnicodeEscapeSequence();
} else {
name += this.getChar();
}
}
const endPos = this.currPos.clone();
return createJSIdentifier(name, new TextSpan(this.file, startPos, endPos))
} else {
throw new ScanError(this.file, this.currPos.clone(), c0);
}
}
public peek(count = 1): JSToken {
while (this.scanned.length < count) {
this.scanned.push(this.scan());
}
return this.scanned[count - 1];
}
public get(): JSToken {
return this.scanned.length > 0
? this.scanned.shift()!
: this.scan();
}
}

View file

@ -4,7 +4,7 @@ import * as fs from "fs"
import moment from "moment"
import chalk from "chalk"
import { TextSpan, TextPos } from "./text"
import { TextFile, TextSpan, TextPos } from "./text"
import { Scanner } from "./scanner"
import { kindToString, Syntax, BoltQualName, BoltDeclaration, BoltDeclarationModifiers, createEndOfFile, SyntaxKind, isBoltPunctuated } from "./ast"
@ -220,3 +220,183 @@ export function getFileStem(filepath: string): string {
return path.basename(filepath).split('.')[0];
}
export function describeKind(kind: SyntaxKind): string {
switch (kind) {
case SyntaxKind.JSIdentifier:
case SyntaxKind.BoltIdentifier:
return "an identifier"
case SyntaxKind.BoltOperator:
return "an operator"
case SyntaxKind.BoltStringLiteral:
return "a string"
case SyntaxKind.BoltIntegerLiteral:
return "an integer"
case SyntaxKind.BoltFnKeyword:
return "'fn'"
case SyntaxKind.BoltForeignKeyword:
return "'foreign'"
case SyntaxKind.BoltMatchKeyword:
return "'match'";
case SyntaxKind.BoltYieldKeyword:
return "'yield'";
case SyntaxKind.BoltReturnKeyword:
return "'return'";
case SyntaxKind.BoltPubKeyword:
return "'pub'"
case SyntaxKind.BoltLetKeyword:
return "'let'"
case SyntaxKind.BoltSemi:
return "';'"
case SyntaxKind.BoltColon:
return "':'"
case SyntaxKind.BoltDot:
return "'.'"
case SyntaxKind.JSDot:
return "'.'"
case SyntaxKind.JSDotDotDot:
return "'...'"
case SyntaxKind.BoltRArrow:
return "'->'"
case SyntaxKind.BoltComma:
return "','"
case SyntaxKind.BoltModKeyword:
return "'mod'"
case SyntaxKind.BoltStructKeyword:
return "'struct'"
case SyntaxKind.BoltEnumKeyword:
return "'enum'"
case SyntaxKind.BoltTypeKeyword:
return "'type'";
case SyntaxKind.BoltBraced:
return "'{' .. '}'"
case SyntaxKind.BoltBracketed:
return "'[' .. ']'"
case SyntaxKind.BoltParenthesized:
return "'(' .. ')'"
case SyntaxKind.EndOfFile:
return "'}', ')', ']' or end-of-file"
case SyntaxKind.BoltLtSign:
return "'<'";
case SyntaxKind.BoltGtSign:
return "'<'";
case SyntaxKind.BoltEqSign:
return "'='";
case SyntaxKind.JSOpenBrace:
return "'{'";
case SyntaxKind.JSCloseBrace:
return "'}'";
case SyntaxKind.JSOpenBracket:
return "'['";
case SyntaxKind.JSCloseBracket:
return "']'";
case SyntaxKind.JSOpenParen:
return "'('";
case SyntaxKind.JSCloseParen:
return "')'";
case SyntaxKind.JSSemi:
return "';'";
case SyntaxKind.JSComma:
return "','";
default:
throw new Error(`failed to describe ${kindToString(kind)}`)
}
}
function enumerate(elements: string[]) {
if (elements.length === 1) {
return elements[0]
} else {
return elements.slice(0, elements.length-1).join(', ') + ' or ' + elements[elements.length-1]
}
}
export class ParseError extends Error {
constructor(public actual: Syntax, public expected: SyntaxKind[]) {
super(`${actual.span!.file.origPath}:${actual.span!.start.line}:${actual.span!.start.column}: expected ${enumerate(expected.map(e => describeKind(e)))} but got ${describeKind(actual.kind)}`)
}
}
export enum OperatorKind {
Prefix,
InfixL,
InfixR,
Suffix,
}
export function isRightAssoc(kind: OperatorKind) {
return kind === OperatorKind.InfixR;
}
export interface OperatorInfo {
kind: OperatorKind;
arity: number;
name: string;
precedence: number;
}
export function assertToken(node: Syntax, kind: SyntaxKind) {
if (node.kind !== kind) {
throw new ParseError(node, [kind]);
}
}
type OperatorTableList = [OperatorKind, number, string][][];
export class OperatorTable {
private operatorsByName = new FastStringMap<string, OperatorInfo>();
//private operatorsByPrecedence = FastStringMap<number, OperatorInfo>();
constructor(definitions: OperatorTableList) {
let i = 0;
for (const group of definitions) {
for (const [kind, arity, name] of group) {
const info = { kind, arity, name, precedence: i }
this.operatorsByName.set(name, info);
//this.operatorsByPrecedence[i] = info;
}
i++;
}
}
public lookup(name: string): OperatorInfo | null {
if (!this.operatorsByName.has(name)) {
return null;
}
return this.operatorsByName.get(name);
}
}
export const EOF = ''
function escapeChar(ch: string) {
switch (ch) {
case '\a': return '\\a';
case '\b': return '\\b';
case '\f': return '\\f';
case '\n': return '\\n';
case '\r': return '\\r';
case '\t': return '\\t';
case '\v': return '\\v';
case '\0': return '\\0';
case '\'': return '\\\'';
default:
const code = ch.charCodeAt(0);
if (code >= 0x20 && code <= 0x7E) {
return ch
} else if (code < 0x7F) {
return `\\x${code.toString(16).padStart(2, '0')}`
} else {
return `\\u${code.toString(16).padStart(4, '0')}`
}
}
}
export class ScanError extends Error {
constructor(public file: TextFile, public position: TextPos, public char: string) {
super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
}
}