From d1ba2cb540b36756adc88f91930ba8975a364f57 Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Fri, 22 May 2020 21:29:14 +0200 Subject: [PATCH] Update code - Move foreign language interfaces to seperate directories - Extend the JavaScript scanner and parser to be able to parse simple call expressions and member expressions - Fix multiple issues in expander.ts - Move shared scanning/parsing utilities to util.ts --- src/ast.d.ts | 142 ++++++++++++++++---- src/expander.ts | 24 ++-- src/foreign/index.ts | 17 +++ src/foreign/js/parser.ts | 117 ++++++++++++++++ src/foreign/js/scanner.ts | 272 ++++++++++++++++++++++++++++++++++++++ src/parser.ts | 193 +++------------------------ src/scanner.ts | 240 +-------------------------------- src/util.ts | 182 ++++++++++++++++++++++++- 8 files changed, 736 insertions(+), 451 deletions(-) create mode 100644 src/foreign/index.ts create mode 100644 src/foreign/js/parser.ts create mode 100644 src/foreign/js/scanner.ts diff --git a/src/ast.d.ts b/src/ast.d.ts index e56fe28e0..efd4072a6 100644 --- a/src/ast.d.ts +++ b/src/ast.d.ts @@ -73,23 +73,33 @@ export const enum SyntaxKind { JSReturnKeyword = 88, JSTryKeyword = 89, JSCatchKeyword = 90, - JSBindPattern = 92, - JSConstantExpression = 94, - JSMemberExpression = 96, - JSCallExpression = 97, - JSBinaryExpression = 98, - JSUnaryExpression = 99, - JSNewExpression = 100, - JSSequenceExpression = 101, - JSConditionalExpression = 102, - JSReferenceExpression = 103, - JSExpressionStatement = 106, - JSConditionalStatement = 107, - JSParameter = 108, - JSFunctionDeclaration = 111, - JSArrowFunctionDeclaration = 112, - JSLetDeclaration = 113, - JSSourceFile = 114, + JSCloseBrace = 91, + JSCloseBracket = 92, + JSCloseParen = 93, + JSOpenBrace = 94, + JSOpenBracket = 95, + JSOpenParen = 96, + JSSemi = 97, + JSComma = 98, + JSDot = 99, + JSDotDotDot = 100, + JSBindPattern = 102, + JSConstantExpression = 104, + JSMemberExpression = 105, + JSCallExpression = 106, + JSBinaryExpression = 107, + JSUnaryExpression = 108, + JSNewExpression = 109, + JSSequenceExpression = 110, + JSConditionalExpression = 111, + JSReferenceExpression = 112, + JSExpressionStatement = 115, + JSConditionalStatement = 116, + JSParameter = 117, + JSFunctionDeclaration = 120, + JSArrowFunctionDeclaration = 121, + JSLetDeclaration = 122, + JSSourceFile = 123, } @@ -586,6 +596,16 @@ export type JSToken | JSReturnKeyword | JSTryKeyword | JSCatchKeyword + | JSCloseBrace + | JSCloseBracket + | JSCloseParen + | JSOpenBrace + | JSOpenBracket + | JSOpenParen + | JSSemi + | JSComma + | JSDot + | JSDotDotDot export interface JSOperator extends SyntaxBase { @@ -610,6 +630,46 @@ export interface JSCatchKeyword extends SyntaxBase { kind: SyntaxKind.JSCatchKeyword; } +export interface JSCloseBrace extends SyntaxBase { + kind: SyntaxKind.JSCloseBrace; +} + +export interface JSCloseBracket extends SyntaxBase { + kind: SyntaxKind.JSCloseBracket; +} + +export interface JSCloseParen extends SyntaxBase { + kind: SyntaxKind.JSCloseParen; +} + +export interface JSOpenBrace extends SyntaxBase { + kind: SyntaxKind.JSOpenBrace; +} + +export interface JSOpenBracket extends SyntaxBase { + kind: SyntaxKind.JSOpenBracket; +} + +export interface JSOpenParen extends SyntaxBase { + kind: SyntaxKind.JSOpenParen; +} + +export interface JSSemi extends SyntaxBase { + kind: SyntaxKind.JSSemi; +} + +export interface JSComma extends SyntaxBase { + kind: SyntaxKind.JSComma; +} + +export interface JSDot extends SyntaxBase { + kind: SyntaxKind.JSDot; +} + +export interface JSDotDotDot extends SyntaxBase { + kind: SyntaxKind.JSDotDotDot; +} + export type JSPattern = JSBindPattern @@ -636,14 +696,10 @@ export interface JSConstantExpression extends SyntaxBase { value: BoltValue; } -export const enum JSMemberExpressionModifiers { - Computed = 1,} - export interface JSMemberExpression extends SyntaxBase { kind: SyntaxKind.JSMemberExpression; value: JSExpression; - property: JSExpression; - modifiers: JSMemberExpressionModifiers; + property: JSIdentifier; } export interface JSCallExpression extends SyntaxBase { @@ -830,6 +886,16 @@ export type JSSyntax | JSReturnKeyword | JSTryKeyword | JSCatchKeyword + | JSCloseBrace + | JSCloseBracket + | JSCloseParen + | JSOpenBrace + | JSOpenBracket + | JSOpenParen + | JSSemi + | JSComma + | JSDot + | JSDotDotDot | JSBindPattern | JSConstantExpression | JSMemberExpression @@ -923,6 +989,16 @@ export type Syntax | JSReturnKeyword | JSTryKeyword | JSCatchKeyword + | JSCloseBrace + | JSCloseBracket + | JSCloseParen + | JSOpenBrace + | JSOpenBracket + | JSOpenParen + | JSSemi + | JSComma + | JSDot + | JSDotDotDot | JSBindPattern | JSConstantExpression | JSMemberExpression @@ -1017,9 +1093,19 @@ export function createJSIdentifier(text: string, span?: TextSpan | null): JSIden export function createJSReturnKeyword(span?: TextSpan | null): JSReturnKeyword; export function createJSTryKeyword(span?: TextSpan | null): JSTryKeyword; export function createJSCatchKeyword(span?: TextSpan | null): JSCatchKeyword; +export function createJSCloseBrace(span?: TextSpan | null): JSCloseBrace; +export function createJSCloseBracket(span?: TextSpan | null): JSCloseBracket; +export function createJSCloseParen(span?: TextSpan | null): JSCloseParen; +export function createJSOpenBrace(span?: TextSpan | null): JSOpenBrace; +export function createJSOpenBracket(span?: TextSpan | null): JSOpenBracket; +export function createJSOpenParen(span?: TextSpan | null): JSOpenParen; +export function createJSSemi(span?: TextSpan | null): JSSemi; +export function createJSComma(span?: TextSpan | null): JSComma; +export function createJSDot(span?: TextSpan | null): JSDot; +export function createJSDotDotDot(span?: TextSpan | null): JSDotDotDot; export function createJSBindPattern(name: JSIdentifier, span?: TextSpan | null): JSBindPattern; export function createJSConstantExpression(value: BoltValue, span?: TextSpan | null): JSConstantExpression; -export function createJSMemberExpression(value: JSExpression, property: JSExpression, modifiers: JSMemberExpressionModifiers, span?: TextSpan | null): JSMemberExpression; +export function createJSMemberExpression(value: JSExpression, property: JSIdentifier, span?: TextSpan | null): JSMemberExpression; export function createJSCallExpression(operator: JSExpression, operands: JSExpression[], span?: TextSpan | null): JSCallExpression; export function createJSBinaryExpression(left: JSExpression, operator: JSOperator, right: JSExpression, span?: TextSpan | null): JSBinaryExpression; export function createJSUnaryExpression(operator: JSOperator, operand: JSExpression, span?: TextSpan | null): JSUnaryExpression; @@ -1121,6 +1207,16 @@ export function isJSIdentifier(value: any): value is JSIdentifier; export function isJSReturnKeyword(value: any): value is JSReturnKeyword; export function isJSTryKeyword(value: any): value is JSTryKeyword; export function isJSCatchKeyword(value: any): value is JSCatchKeyword; +export function isJSCloseBrace(value: any): value is JSCloseBrace; +export function isJSCloseBracket(value: any): value is JSCloseBracket; +export function isJSCloseParen(value: any): value is JSCloseParen; +export function isJSOpenBrace(value: any): value is JSOpenBrace; +export function isJSOpenBracket(value: any): value is JSOpenBracket; +export function isJSOpenParen(value: any): value is JSOpenParen; +export function isJSSemi(value: any): value is JSSemi; +export function isJSComma(value: any): value is JSComma; +export function isJSDot(value: any): value is JSDot; +export function isJSDotDotDot(value: any): value is JSDotDotDot; export function isJSPattern(value: any): value is JSPattern; export function isJSBindPattern(value: any): value is JSBindPattern; export function isJSExpression(value: any): value is JSExpression; diff --git a/src/expander.ts b/src/expander.ts index 327145e7e..215e2a160 100644 --- a/src/expander.ts +++ b/src/expander.ts @@ -8,11 +8,10 @@ import { kindToString, BoltSyntax, BoltSentence, - createBoltEOS, createBoltRecordPattern, createBoltExpressionPattern, createBoltIdentifier, - createBoltReferenceTypeNode, + createBoltReferenceTypeExpression, createBoltConstantExpression, createBoltTuplePattern, createBoltQualName, @@ -24,7 +23,7 @@ import { createBoltSourceFile, BoltPattern, BoltSourceElement, - BoltReferenceTypeNode, + BoltReferenceTypeExpression, createBoltRecordDeclaration, createBoltRecordDeclarationField, isBoltSourceElement, @@ -34,7 +33,8 @@ import { import { TextSpan } from "./text" import { TypeChecker } from "./checker" -import { Parser, ParseError } from "./parser" +import { ParseError } from "./util" +import { Parser } from "./parser" import { Evaluator, TRUE, FALSE } from "./evaluator" import { StreamWrapper, setOrigNodeRange, BoltTokenStream, createTokenStream } from "./util" @@ -43,9 +43,9 @@ interface Transformer { transform: (node: BoltTokenStream) => BoltSyntax; } -function createSimpleBoltReferenceTypeNode(text: string): BoltReferenceTypeNode { +function createSimpleBoltReferenceTypeExpression(text: string): BoltReferenceTypeExpression { const ids = text.split('.').map(name => createBoltIdentifier(name)) - return createBoltReferenceTypeNode(createBoltQualName(ids.slice(0, -1), ids[ids.length-1]), []) + return createBoltReferenceTypeExpression(createBoltQualName(ids.slice(0, -1), ids[ids.length-1]), []) } /// This is actually a hand-parsed version of the following: @@ -64,26 +64,26 @@ function createSimpleBoltReferenceTypeNode(text: string): BoltReferenceTypeNode /// } //const PATTERN_SYNTAX: BoltPattern = // createBoltRecordPattern( -// createSimpleBoltReferenceTypeNode('Bolt.AST.Sentence'), +// createSimpleBoltReferenceTypeExpression('Bolt.AST.Sentence'), // [ // createBoltRecordDeclarationField( // createBoltIdentifier('elements'), // createBoltTuplePattern([ // createBoltRecordPattern( -// createSimpleBoltReferenceTypeNode('Bolt.AST.Identifier'), +// createSimpleBoltReferenceTypeExpression('Bolt.AST.Identifier'), // [{ // name: createBoltIdentifier('text'), // pattern: createBoltConstantExpression('syntax') // }] // ), // createBoltRecordPattern( -// createSimpleBoltReferenceTypeNode('Bolt.AST.Braced'), +// createSimpleBoltReferenceTypeExpression('Bolt.AST.Braced'), // [{ // name: createBoltIdentifier('elements'), // pattern: createBoltTuplePattern([ -// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))), -// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))), -// createBoltTypePattern(createSimpleBoltReferenceTypeNode('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression'))) +// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.Pattern'), createBoltBindPattern(createBoltIdentifier('pattern'))), +// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.RArrow'), createBoltBindPattern(createBoltIdentifier('_'))), +// createBoltTypePattern(createSimpleBoltReferenceTypeExpression('Bolt.AST.Expr'), createBoltBindPattern(createBoltIdentifier('expression'))) // ]) // }] // ) diff --git a/src/foreign/index.ts b/src/foreign/index.ts new file mode 100644 index 000000000..c8fa07b33 --- /dev/null +++ b/src/foreign/index.ts @@ -0,0 +1,17 @@ + +import { TextFile, TextPos } from "../text" + +import { JSScanner } from "./js/scanner" +import { JSParser } from "./js/parser" + +export function parseForeignLanguage(langName: string, text: string, file: TextFile, offset: TextPos) { + switch (langName) { + case "JS": + const scanner = new JSScanner(file, text, offset); + const parser = new JSParser(); + return parser.parseJSSourceElementList(scanner) + default: + throw new Error(`Did not know how to parse a foreign language named ${langName}.`); + } +} + diff --git a/src/foreign/js/parser.ts b/src/foreign/js/parser.ts new file mode 100644 index 000000000..474afb292 --- /dev/null +++ b/src/foreign/js/parser.ts @@ -0,0 +1,117 @@ + +import { Stream, assertToken, setOrigNodeRange, ParseError } from "../../util" + +import { + SyntaxKind, + JSToken, + JSStatement, + JSSourceElement, + JSExpressionStatement, + createJSExpressionStatement, + JSExpression, + JSReferenceExpression, + createJSReferenceExpression, + JSIdentifier, + JSMemberExpression, + createJSMemberExpression, + createJSCallExpression +} from "../../ast" + +export type JSTokenStream = Stream; + +export class JSParser { + + public parseJSReferenceExpression(tokens: JSTokenStream): JSReferenceExpression { + const t0 = tokens.get(); + assertToken(t0, SyntaxKind.JSIdentifier); + const result = createJSReferenceExpression((t0 as JSIdentifier).text); + setOrigNodeRange(result, t0, t0); + return result; + } + + private parsePrimitiveJSExpression(tokens: JSTokenStream): JSExpression { + const t0 = tokens.peek(); + if (t0.kind === SyntaxKind.JSIdentifier) { + return this.parseJSReferenceExpression(tokens); + } else { + throw new ParseError(t0, [SyntaxKind.JSIdentifier]); + } + } + + public parseJSExpression(tokens: JSTokenStream): JSExpression { + const firstToken = tokens.peek(); + let result = this.parsePrimitiveJSExpression(tokens); + while (true) { + const t1 = tokens.peek(); + if (t1.kind === SyntaxKind.JSCloseBrace || t1.kind === SyntaxKind.JSCloseParen || t1.kind === SyntaxKind.JSCloseBracket || t1.kind === SyntaxKind.JSSemi) { + break; + } + if (t1.kind === SyntaxKind.JSDot) { + tokens.get(); + const t2 = tokens.get(); + assertToken(t2, SyntaxKind.JSIdentifier); + const oldResult = result; + result = createJSMemberExpression(oldResult, t2 as JSIdentifier); + setOrigNodeRange(result, oldResult, t2); + } else if (t1.kind === SyntaxKind.JSOpenBracket) { + tokens.get(); + // TODO + } else if (t1.kind === SyntaxKind.JSOpenParen) { + tokens.get(); + let lastToken; + let args: JSExpression[] = []; + while (true) { + const t2 = tokens.peek(); + if (t2.kind === SyntaxKind.JSCloseParen) { + lastToken = t2; + break; + } + args.push(this.parseJSExpression(tokens)); + const t3 = tokens.get(); + if (t3.kind === SyntaxKind.JSCloseParen) { + lastToken = t3; + break; + } else { + assertToken(t3, SyntaxKind.JSComma); + } + } + const oldResult = result; + result = createJSCallExpression(oldResult, args); + setOrigNodeRange(result, firstToken, lastToken); + } else { + throw new ParseError(t1, [SyntaxKind.JSDot, SyntaxKind.JSOpenBracket]); + } + } + return result; + } + + public parseJSExpressionStatement(tokens: JSTokenStream): JSExpressionStatement { + const expr = this.parseJSExpression(tokens); + const result = createJSExpressionStatement(expr); + setOrigNodeRange(result, expr, expr); + return result; + } + + public parseJSStatement(tokens: JSTokenStream): JSStatement { + return this.parseJSExpressionStatement(tokens); + } + + public parseJSSourceElementList(tokens: JSTokenStream): JSSourceElement[] { + const elements: JSSourceElement[] = []; + while (true) { + const t0 = tokens.peek(); + if (t0.kind === SyntaxKind.EndOfFile) { + break; + } + if (t0.kind === SyntaxKind.JSSemi) { + tokens.get(); + continue; + } + const statement = this.parseJSStatement(tokens) + elements.push(statement); + } + return elements; + } + +} + diff --git a/src/foreign/js/scanner.ts b/src/foreign/js/scanner.ts new file mode 100644 index 000000000..0ff071971 --- /dev/null +++ b/src/foreign/js/scanner.ts @@ -0,0 +1,272 @@ + +import XRegExp from "xregexp" + +import { TextPos, TextSpan, TextFile } from "../../text" +import { EOF, ScanError } from "../../util" + +import { + JSToken, + createJSIdentifier, + createJSDot, + createJSDotDotDot, + createJSOpenBracket, + createJSCloseBracket, + createJSCloseParen, + createJSOpenParen, + createJSOpenBrace, + createJSCloseBrace, + createJSSemi, + createJSComma, + createEndOfFile, +} from "../../ast" + +function isWhiteSpace(ch: string): boolean { + return /[\u0009\u000B\u000C\u0020\u00A0\u000B\uFEFF\p{Zs}]/.test(ch) +} + +function isLineTerminator(ch: string): boolean { + return ch === '\u000A' + || ch === '\u000D' + || ch === '\u2028' + || ch === '\u2029';; +} + +function isIdentStart(ch: string): boolean { + return /[\p{ID_Start}$_\\]/u.test(ch) +} + +function isIdentPart(ch: string): boolean { + return /[\u200C\u200D\p{ID_Continue}$\\]/u.test(ch) +} + +export class JSScanner { + + private buffer: string[] = []; + private scanned: JSToken[] = []; + private offset = 0; + + constructor( + private file: TextFile, + private input: string, + private currPos: TextPos = new TextPos(0,1,1), + ) { + + } + + protected readChar() { + if (this.offset === this.input.length) { + return EOF + } + return this.input[this.offset++] + } + + protected peekChar(count = 1) { + while (this.buffer.length < count) { + this.buffer.push(this.readChar()); + } + return this.buffer[count - 1]; + } + + protected getChar() { + + const ch = this.buffer.length > 0 + ? this.buffer.shift()! + : this.readChar() + + if (ch == EOF) { + return EOF + } + + if (isLineTerminator(ch)) { + this.currPos.line += 1; + this.currPos.column = 1; + } else { + this.currPos.column += 1; + } + this.currPos.offset += 1; + + return ch + } + + private assertChar(expected: string) { + const actual = this.getChar(); + if (actual !== expected) { + throw new ScanError(this.file, this.currPos.clone(), actual); + } + } + + private scanLineComment(): string { + let text = ''; + this.assertChar('/'); + this.assertChar('/') + while (true) { + const c2 = this.peekChar(); + if (isLineTerminator(c2)) { + this.getChar(); + if (this.peekChar() === '\r') { + this.getChar(); + } + break; + } + if (c2 === EOF) { + break; + } + text += this.getChar(); + } + return text; + } + + private scanMultiLineComment(): string { + let text = ''; + while (true) { + const c2 = this.getChar(); + if (c2 === '*') { + const c3 = this.getChar(); + if (c3 === '/') { + break; + } + text += c2 + c3; + } else if (c2 === EOF) { + throw new ScanError(this.file, this.currPos.clone(), c2); + } else { + text += c2; + } + } + return text; + } + + private skipComments() { + while (true) { + const c0 = this.peekChar(); + if (c0 === '/') { + const c1 = this.peekChar(2); + if (c1 == '/') { + this.scanLineComment(); + } else if (c1 === '*') { + this.scanMultiLineComment(); + } else { + break; + } + } else if (isWhiteSpace(c0) || isLineTerminator(c0)) { + this.getChar(); + } else { + break; + } + } + } + + private scanHexDigit(): number { + const startPos = this.currPos.clone(); + const c0 = this.getChar(); + switch (c0) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 0; + case 'A': return 10; + case 'B': return 11; + case 'C': return 12; + case 'D': return 13; + case 'E': return 14; + case 'F': return 15; + case 'a': return 10; + case 'b': return 11; + case 'c': return 12; + case 'd': return 13; + case 'e': return 14; + case 'f': return 15; + default: + throw new ScanError(this.file, startPos, c0); + } + } + + private scanUnicodeEscapeSequence() { + throw new Error(`Scanning unicode escape sequences is not yet implemented.`); + } + + public scan(): JSToken { + + this.skipComments(); + + const c0 = this.peekChar(); + + if (c0 === EOF) { + return createEndOfFile(new TextSpan(this.file, this.currPos.clone(), this.currPos.clone())) + } + + const startPos = this.currPos.clone(); + + if (/[,;()\[\]{}]/.test(c0)) { + this.getChar(); + const span = new TextSpan(this.file, startPos, this.currPos.clone()); + switch (c0) { + case '(': return createJSOpenParen(span); + case ')': return createJSCloseParen(span); + case '[': return createJSOpenBracket(span); + case ']': return createJSCloseBracket(span); + case '{': return createJSOpenBrace(span); + case '}': return createJSCloseBrace(span); + case ',': return createJSComma(span); + case ';': return createJSSemi(span); + } + } + + let i = 0; + let ch = c0; + while (ch === '.') { + this.getChar(); + ch = this.peekChar(); + i++; + } + + if (i > 0) { + if (i === 1) { + return createJSDot(new TextSpan(this.file, startPos, this.currPos.clone())); + } else if (i === 3) { + return createJSDotDotDot(new TextSpan(this.file, startPos, this.currPos.clone())); + } else { + throw new ScanError(this.file, startPos, c0); + } + } + + if (isIdentStart(c0)) { + let name = ''; + while (true) { + const c0 = this.peekChar(); + if (!isIdentPart(c0)) { + break; + } + if (c0 === '\\') { + name += this.scanUnicodeEscapeSequence(); + } else { + name += this.getChar(); + } + } + const endPos = this.currPos.clone(); + return createJSIdentifier(name, new TextSpan(this.file, startPos, endPos)) + } else { + throw new ScanError(this.file, startPos, c0); + } + } + + public peek(count = 1): JSToken { + while (this.scanned.length < count) { + this.scanned.push(this.scan()); + } + return this.scanned[count - 1]; + } + + public get(): JSToken { + return this.scanned.length > 0 + ? this.scanned.shift()! + : this.scan(); + } + +} + diff --git a/src/parser.ts b/src/parser.ts index 0b86338a7..8c1005781 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,3 @@ -) -import * as acorn from "acorn" import { SyntaxKind, @@ -48,120 +46,26 @@ import { createBoltFunctionDeclaration, createBoltCallExpression, BoltSymbol, - JSSourceElement, - JSStatement, BoltTypeParameter, createBoltTypePattern, createBoltTypeParameter, } from "./ast" -import { Scanner } from "./scanner" +import { parseForeignLanguage } from "./foreign" -import { Stream, setOrigNodeRange, createTokenStream, uniq, FastStringMap } from "./util" +import { + Stream, + OperatorKind, + OperatorTable, + assertToken, + ParseError, + setOrigNodeRange, + createTokenStream, + uniq, +} from "./util" export type BoltTokenStream = Stream; -export type JSTokenStream = Stream; - -function describeKind(kind: SyntaxKind): string { - switch (kind) { - case SyntaxKind.BoltIdentifier: - return "an identifier" - case SyntaxKind.BoltOperator: - return "an operator" - case SyntaxKind.BoltStringLiteral: - return "a string" - case SyntaxKind.BoltIntegerLiteral: - return "an integer" - case SyntaxKind.BoltFnKeyword: - return "'fn'" - case SyntaxKind.BoltForeignKeyword: - return "'foreign'" - case SyntaxKind.BoltMatchKeyword: - return "'match'"; - case SyntaxKind.BoltYieldKeyword: - return "'yield'"; - case SyntaxKind.BoltReturnKeyword: - return "'return'"; - case SyntaxKind.BoltPubKeyword: - return "'pub'" - case SyntaxKind.BoltLetKeyword: - return "'let'" - case SyntaxKind.BoltSemi: - return "';'" - case SyntaxKind.BoltColon: - return "':'" - case SyntaxKind.BoltDot: - return "'.'" - case SyntaxKind.BoltRArrow: - return "'->'" - case SyntaxKind.BoltComma: - return "','" - case SyntaxKind.BoltModKeyword: - return "'mod'" - case SyntaxKind.BoltStructKeyword: - return "'struct'" - case SyntaxKind.BoltEnumKeyword: - return "'enum'" - case SyntaxKind.BoltTypeKeyword: - return "'type'"; - case SyntaxKind.BoltBraced: - return "'{' .. '}'" - case SyntaxKind.BoltBracketed: - return "'[' .. ']'" - case SyntaxKind.BoltParenthesized: - return "'(' .. ')'" - case SyntaxKind.EndOfFile: - return "'}', ')', ']' or end-of-file" - case SyntaxKind.BoltLtSign: - return "'<'"; - case SyntaxKind.BoltGtSign: - return "'<'"; - case SyntaxKind.BoltEqSign: - return "'='"; - default: - throw new Error(`failed to describe ${kindToString(kind)}`) - } -} - -function enumerate(elements: string[]) { - if (elements.length === 1) { - return elements[0] - } else { - return elements.slice(0, elements.length-1).join(', ') + ' or ' + elements[elements.length-1] - } -} - -export class ParseError extends Error { - constructor(public actual: BoltToken, public expected: SyntaxKind[]) { - super(`${actual.span!.file.origPath}:${actual.span!.start.line}:${actual.span!.start.column}: expected ${enumerate(expected.map(e => describeKind(e)))} but got ${describeKind(actual.kind)}`) - } -} - -enum OperatorKind { - Prefix, - InfixL, - InfixR, - Suffix, -} - -function isRightAssoc(kind: OperatorKind) { - return kind === OperatorKind.InfixR; -} - -interface OperatorInfo { - kind: OperatorKind; - arity: number; - name: string; - precedence: number; -} - -function assertToken(node: BoltToken, kind: SyntaxKind) { - if (node.kind !== kind) { - throw new ParseError(node, [kind]); - } -} - const KIND_EXPRESSION_T0 = [ SyntaxKind.BoltStringLiteral, SyntaxKind.BoltIntegerLiteral, @@ -198,34 +102,6 @@ const KIND_SOURCEELEMENT_T0 = uniq([ ...KIND_DECLARATION_T0, ]) -type OperatorTableMatrix = [OperatorKind, number, string][][]; - -class OperatorTable { - - private operatorsByName = new FastStringMap(); - //private operatorsByPrecedence = FastStringMap(); - - constructor(definitions: OperatorTableMatrix) { - let i = 0; - for (const group of definitions) { - for (const [kind, arity, name] of group) { - const info = { kind, arity, name, precedence: i } - this.operatorsByName.set(name, info); - //this.operatorsByPrecedence[i] = info; - } - i++; - } - } - - public lookup(name: string): OperatorInfo | null { - if (!this.operatorsByName.has(name)) { - return null; - } - return this.operatorsByName.get(name); - } - -} - export class Parser { exprOperatorTable = new OperatorTable([ @@ -922,16 +798,13 @@ export class Parser { case "Bolt": body = this.parseStatements(tokens); break; - case "JS": - const scanner = new Scanner(t3.span!.file, t3.text); - body = this.parseJSSourceElementList(scanner); - break; default: - throw new Error(`Unrecognised language: ${target}`); + body = parseForeignLanguage(target, t3.text, t3.span!.file, t3.span!.start); + break; } } - const node = createBoltFunctionDeclaration( + const result = createBoltFunctionDeclaration( modifiers, target, name, @@ -939,28 +812,11 @@ export class Parser { returnType, body ); - setOrigNodeRange(node, firstToken, lastToken!); - return node; + setOrigNodeRange(result, firstToken, lastToken!); + return result; } - //public parseModuleDeclaration(tokens: BoltTokenStream): BoltModule { - //let modifiers = 0; - //let t0 = tokens.get(); - //if (t0.kind === SyntaxKind.BoltPubKeyword) { - //modifiers |= BoltDeclarationModifiers.Public; - //t0 = tokens.get(); - //} - //assertToken(t0, SyntaxKind.BoltModKeyword); - //const name = this.parseQualName(tokens); - //const t1 = tokens.get(); - //assertToken(t1, SyntaxKind.BoltBraced); - //const elements = this.parseSourceElementList(createTokenStream(t1)); - //const node = createBoltModule(modifiers, name, elements); - //setOrigNodeRange(node, t0, t1); - //return node; - //} - public parseDeclaration(tokens: BoltTokenStream): BoltDeclaration { let t0 = tokens.peek(1); let i = 1; @@ -1091,22 +947,5 @@ export class Parser { } - public parseJSStatement(tokens: JSTokenStream): JSStatement { - return this.parseJSExpressionStatement(tokens); - } - - public parseJSSourceElementList(tokens: JSTokenStream): JSSourceElement[] { - const elements: JSSourceElement[] = []; - while (true) { - const t0 = tokens.peek(); - if (t0.kind === SyntaxKind.EndOfFile) { - break; - } - const statement = this.parseJSStatement(tokens) - elements.push(statement); - } - return elements; - } - } diff --git a/src/scanner.ts b/src/scanner.ts index 9a43c4aa1..1b5ce8e6e 100644 --- a/src/scanner.ts +++ b/src/scanner.ts @@ -1,6 +1,8 @@ import XRegExp from "xregexp" +import { EOF, ScanError } from "./util" + import { TextFile, TextPos, @@ -53,29 +55,6 @@ export enum PunctType { Brace, } -function escapeChar(ch: string) { - switch (ch) { - case '\a': return '\\a'; - case '\b': return '\\b'; - case '\f': return '\\f'; - case '\n': return '\\n'; - case '\r': return '\\r'; - case '\t': return '\\t'; - case '\v': return '\\v'; - case '\0': return '\\0'; - case '\'': return '\\\''; - default: - const code = ch.charCodeAt(0); - if (code >= 0x20 && code <= 0x7E) { - return ch - } else if (code < 0x7F) { - return `\\x${code.toString(16).padStart(2, '0')}` - } else { - return `\\u${code.toString(16).padStart(4, '0')}` - } - } -} - function getPunctType(ch: string) { switch (ch) { case '(': @@ -114,11 +93,6 @@ function isOpenPunct(ch: string) { } } -class ScanError extends Error { - constructor(public file: TextFile, public position: TextPos, public char: string) { - super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`) - } -} function isDigit(ch: string) { return XRegExp('\\p{Nd}').test(ch) @@ -144,32 +118,6 @@ function isSymbol(ch: string) { return /[=+\/\-*%$!><&^|]/.test(ch) } - -function isJSWhiteSpace(ch: string): boolean { - return ch === '\u0009' - || ch === '\u000B' - || ch === '\u000C' - || ch === '\u0020' - || ch === '\u00A0' - || ch === '\u000B' - || ch === '\uFEFF' - || XRegExp('\\p{Zs}').test(ch) -} - -function isJSIdentStart(ch: string): boolean { - return XRegExp('[\\p{ID_Start}$_\\]').test(ch) -} - -function isJSIdentPart(ch: string): boolean { - return XRegExp('[\u200C\u200D\\p{ID_Continue}$\\]').test(ch) -} - -//function isOperatorPart(ch: string) { - //return /[=+\-*\/%$!><]/.test(ch) -//} - -const EOF = '' - export class Scanner { protected buffer: string[] = []; @@ -449,187 +397,3 @@ export class Scanner { } -export class JSScanner { - - private buffer: string[] = []; - private scanned: JSToken[] = []; - private offset = 0; - - constructor( - private file: TextFile, - private input: string, - private currPos: TextPos = new TextPos(0,1,1), - ) { - - } - - protected readChar() { - if (this.offset == this.input.length) { - return EOF - } - return this.input[this.offset++] - } - - protected peekChar(count = 1) { - while (this.buffer.length < count) { - this.buffer.push(this.readChar()); - } - return this.buffer[count - 1]; - } - - protected getChar() { - - const ch = this.buffer.length > 0 - ? this.buffer.shift()! - : this.readChar() - - if (ch == EOF) { - return EOF - } - - if (isNewLine(ch)) { - this.currPos.line += 1; - this.currPos.column = 1; - } else { - this.currPos.column += 1; - } - this.currPos.offset += 1; - - return ch - } - - private assertChar(expected: string) { - const actual = this.getChar(); - if (actual !== expected) { - throw new ScanError(this.file, this.currPos.clone(), actual); - } - } - - private scanLineComment(): string { - let text = ''; - this.assertChar('/'); - this.assertChar('/') - while (true) { - const c2 = this.peekChar(); - if (c2 === '\n') { - this.getChar(); - if (this.peekChar() === '\r') { - this.getChar(); - } - break; - } - if (c2 === EOF) { - break; - } - text += this.getChar(); - } - return text; - } - - private scanMultiLineComment(): string { - let text = ''; - while (true) { - const c2 = this.getChar(); - if (c2 === '*') { - const c3 = this.getChar(); - if (c3 === '/') { - break; - } - text += c2 + c3; - } else if (c2 === EOF) { - throw new ScanError(this.file, this.currPos.clone(), c2); - } else { - text += c2; - } - } - return text; - } - - private skipComments() { - while (true) { - const c0 = this.peekChar(); - if (c0 === '/') { - const c1 = this.peekChar(2); - if (c1 == '/') { - this.scanLineComment(); - } else if (c1 === '*') { - this.scanMultiLineComment(); - } else { - break; - } - } else if (isWhiteSpace(c0)) { - this.getChar(); - } else { - break; - } - } - } - - private scanHexDigit(): number { - const startPos = this.currPos.clone(); - const c0 = this.getChar(); - switch (c0.toLowerCase()) { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 0; - case 'a': return 10; - case 'b': return 11; - case 'c': return 12; - case 'd': return 13; - case 'e': return 14; - case 'f': return 15; - default: - throw new ScanError(this.file, startPos, c0); - } - } - - private scanUnicodeEscapeSequence() { - throw new Error(`Scanning unicode escape sequences is not yet implemented.`); - } - - public scan(): JSToken { - this.skipComments(); - const c0 = this.peekChar(); - const startPos = this.currPos.clone(); - if (isJSIdentStart(c0)) { - let name = ''; - while (true) { - const c0 = this.peekChar(); - if (!isJSIdentPart(c0)) { - break; - } - if (c0 === '\\') { - name += this.scanUnicodeEscapeSequence(); - } else { - name += this.getChar(); - } - } - const endPos = this.currPos.clone(); - return createJSIdentifier(name, new TextSpan(this.file, startPos, endPos)) - } else { - throw new ScanError(this.file, this.currPos.clone(), c0); - } - } - - public peek(count = 1): JSToken { - while (this.scanned.length < count) { - this.scanned.push(this.scan()); - } - return this.scanned[count - 1]; - } - - public get(): JSToken { - return this.scanned.length > 0 - ? this.scanned.shift()! - : this.scan(); - } - -} - diff --git a/src/util.ts b/src/util.ts index c9ec5902d..9af47600f 100644 --- a/src/util.ts +++ b/src/util.ts @@ -4,7 +4,7 @@ import * as fs from "fs" import moment from "moment" import chalk from "chalk" -import { TextSpan, TextPos } from "./text" +import { TextFile, TextSpan, TextPos } from "./text" import { Scanner } from "./scanner" import { kindToString, Syntax, BoltQualName, BoltDeclaration, BoltDeclarationModifiers, createEndOfFile, SyntaxKind, isBoltPunctuated } from "./ast" @@ -220,3 +220,183 @@ export function getFileStem(filepath: string): string { return path.basename(filepath).split('.')[0]; } +export function describeKind(kind: SyntaxKind): string { + switch (kind) { + case SyntaxKind.JSIdentifier: + case SyntaxKind.BoltIdentifier: + return "an identifier" + case SyntaxKind.BoltOperator: + return "an operator" + case SyntaxKind.BoltStringLiteral: + return "a string" + case SyntaxKind.BoltIntegerLiteral: + return "an integer" + case SyntaxKind.BoltFnKeyword: + return "'fn'" + case SyntaxKind.BoltForeignKeyword: + return "'foreign'" + case SyntaxKind.BoltMatchKeyword: + return "'match'"; + case SyntaxKind.BoltYieldKeyword: + return "'yield'"; + case SyntaxKind.BoltReturnKeyword: + return "'return'"; + case SyntaxKind.BoltPubKeyword: + return "'pub'" + case SyntaxKind.BoltLetKeyword: + return "'let'" + case SyntaxKind.BoltSemi: + return "';'" + case SyntaxKind.BoltColon: + return "':'" + case SyntaxKind.BoltDot: + return "'.'" + case SyntaxKind.JSDot: + return "'.'" + case SyntaxKind.JSDotDotDot: + return "'...'" + case SyntaxKind.BoltRArrow: + return "'->'" + case SyntaxKind.BoltComma: + return "','" + case SyntaxKind.BoltModKeyword: + return "'mod'" + case SyntaxKind.BoltStructKeyword: + return "'struct'" + case SyntaxKind.BoltEnumKeyword: + return "'enum'" + case SyntaxKind.BoltTypeKeyword: + return "'type'"; + case SyntaxKind.BoltBraced: + return "'{' .. '}'" + case SyntaxKind.BoltBracketed: + return "'[' .. ']'" + case SyntaxKind.BoltParenthesized: + return "'(' .. ')'" + case SyntaxKind.EndOfFile: + return "'}', ')', ']' or end-of-file" + case SyntaxKind.BoltLtSign: + return "'<'"; + case SyntaxKind.BoltGtSign: + return "'<'"; + case SyntaxKind.BoltEqSign: + return "'='"; + case SyntaxKind.JSOpenBrace: + return "'{'"; + case SyntaxKind.JSCloseBrace: + return "'}'"; + case SyntaxKind.JSOpenBracket: + return "'['"; + case SyntaxKind.JSCloseBracket: + return "']'"; + case SyntaxKind.JSOpenParen: + return "'('"; + case SyntaxKind.JSCloseParen: + return "')'"; + case SyntaxKind.JSSemi: + return "';'"; + case SyntaxKind.JSComma: + return "','"; + default: + throw new Error(`failed to describe ${kindToString(kind)}`) + } +} + +function enumerate(elements: string[]) { + if (elements.length === 1) { + return elements[0] + } else { + return elements.slice(0, elements.length-1).join(', ') + ' or ' + elements[elements.length-1] + } +} + +export class ParseError extends Error { + constructor(public actual: Syntax, public expected: SyntaxKind[]) { + super(`${actual.span!.file.origPath}:${actual.span!.start.line}:${actual.span!.start.column}: expected ${enumerate(expected.map(e => describeKind(e)))} but got ${describeKind(actual.kind)}`) + } +} + +export enum OperatorKind { + Prefix, + InfixL, + InfixR, + Suffix, +} + +export function isRightAssoc(kind: OperatorKind) { + return kind === OperatorKind.InfixR; +} + +export interface OperatorInfo { + kind: OperatorKind; + arity: number; + name: string; + precedence: number; +} + +export function assertToken(node: Syntax, kind: SyntaxKind) { + if (node.kind !== kind) { + throw new ParseError(node, [kind]); + } +} + + +type OperatorTableList = [OperatorKind, number, string][][]; + +export class OperatorTable { + + private operatorsByName = new FastStringMap(); + //private operatorsByPrecedence = FastStringMap(); + + constructor(definitions: OperatorTableList) { + let i = 0; + for (const group of definitions) { + for (const [kind, arity, name] of group) { + const info = { kind, arity, name, precedence: i } + this.operatorsByName.set(name, info); + //this.operatorsByPrecedence[i] = info; + } + i++; + } + } + + public lookup(name: string): OperatorInfo | null { + if (!this.operatorsByName.has(name)) { + return null; + } + return this.operatorsByName.get(name); + } + +} + +export const EOF = '' + +function escapeChar(ch: string) { + switch (ch) { + case '\a': return '\\a'; + case '\b': return '\\b'; + case '\f': return '\\f'; + case '\n': return '\\n'; + case '\r': return '\\r'; + case '\t': return '\\t'; + case '\v': return '\\v'; + case '\0': return '\\0'; + case '\'': return '\\\''; + default: + const code = ch.charCodeAt(0); + if (code >= 0x20 && code <= 0x7E) { + return ch + } else if (code < 0x7F) { + return `\\x${code.toString(16).padStart(2, '0')}` + } else { + return `\\u${code.toString(16).padStart(4, '0')}` + } + } +} + +export class ScanError extends Error { + constructor(public file: TextFile, public position: TextPos, public char: string) { + super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`) + } +} +