bolt/src/parser.ts

686 lines
17 KiB
TypeScript
Raw Normal View History

2020-02-25 12:26:21 +01:00
2020-02-25 17:55:17 +01:00
import * as acorn from "acorn"
2020-02-25 12:26:21 +01:00
import {
Syntax,
Token,
FuncDecl,
Identifier,
SyntaxKind,
TokenStream,
RetStmt,
VarDecl,
Stmt,
Patt,
Expr,
BindPatt,
Param,
RefExpr,
TypeRef,
TypeDecl,
ConstExpr,
2020-02-25 17:55:17 +01:00
QualName,
CallExpr,
ImportDecl,
SourceElement,
Module,
RecordDecl,
2020-03-03 14:53:54 +01:00
NewTypeDecl,
2020-02-25 12:26:21 +01:00
} from "./ast"
import { stringType, intType } from "./checker"
import { PrimValue } from "./evaluator"
2020-02-25 12:26:21 +01:00
function describeKind(kind: SyntaxKind): string {
switch (kind) {
case SyntaxKind.Identifier:
return "an identifier"
case SyntaxKind.Operator:
return "an operator"
2020-02-25 17:55:17 +01:00
case SyntaxKind.StringLiteral:
return "a string"
case SyntaxKind.IntegerLiteral:
return "an integer"
case SyntaxKind.FnKeyword:
2020-02-25 17:55:17 +01:00
return "'fn'"
case SyntaxKind.ForeignKeyword:
return "'foreign'"
case SyntaxKind.PubKeyword:
return "'pub'"
2020-02-25 17:55:17 +01:00
case SyntaxKind.LetKeyword:
return "'let'"
2020-02-25 12:26:21 +01:00
case SyntaxKind.Semi:
return "';'"
case SyntaxKind.Colon:
return "':'"
case SyntaxKind.Dot:
return "'.'"
2020-03-03 14:53:54 +01:00
case SyntaxKind.RArrow:
return "'->'"
2020-02-25 12:26:21 +01:00
case SyntaxKind.Comma:
return "','"
case SyntaxKind.ModKeyword:
return "'mod'"
case SyntaxKind.StructKeyword:
return "'struct'"
case SyntaxKind.EnumKeyword:
return "'enum'"
2020-02-25 12:26:21 +01:00
case SyntaxKind.Braced:
return "'{' .. '}'"
case SyntaxKind.Bracketed:
return "'[' .. ']'"
case SyntaxKind.Parenthesized:
return "'(' .. ')'"
2020-02-25 17:55:17 +01:00
case SyntaxKind.EOS:
return "'}', ')', ']' or end-of-file"
2020-02-25 12:26:21 +01:00
default:
2020-02-25 17:55:17 +01:00
throw new Error(`failed to describe ${SyntaxKind[kind]}`)
2020-02-25 12:26:21 +01:00
}
}
function enumerate(elements: string[]) {
if (elements.length === 1) {
return elements[0]
} else {
2020-02-25 17:55:17 +01:00
return elements.slice(0, elements.length-1).join(',') + ' or ' + elements[elements.length-1]
2020-02-25 12:26:21 +01:00
}
}
export class ParseError extends Error {
constructor(public actual: Token, public expected: SyntaxKind[]) {
2020-02-25 17:59:36 +01:00
super(`${actual.span.file.path}:${actual.span.start.line}:${actual.span.start.column}: expected ${enumerate(expected.map(e => describeKind(e)))} but got ${describeKind(actual.kind)}`)
2020-02-25 12:26:21 +01:00
}
}
2020-03-03 14:53:54 +01:00
enum OperatorKind {
Prefix,
InfixL,
InfixR,
Suffix,
}
interface OperatorInfo {
kind: OperatorKind;
arity: number;
name: string;
precedence: number;
}
2020-02-25 12:26:21 +01:00
export class Parser {
2020-03-03 14:53:54 +01:00
operatorTable = [
[
[OperatorKind.InfixL, 2, '&&'],
[OperatorKind.InfixL, 2, '||']
],
[
[OperatorKind.InfixL, 2, '<'],
[OperatorKind.InfixL, 2, '>'],
[OperatorKind.InfixL, 2, '<='],
[OperatorKind.InfixL, 2, '>=']
],
[
[OperatorKind.InfixL, 2, '>>'],
[OperatorKind.InfixL, 2, '<<']
],
[
[OperatorKind.InfixL, 2, '+'],
[OperatorKind.InfixL, 2, '-'],
],
[
[OperatorKind.InfixL, 2, '/'],
[OperatorKind.InfixL, 2, '*'],
[OperatorKind.InfixL, 2, '%'],
],
[
[OperatorKind.Prefix, '!']
],
];
parseQualName(tokens: TokenStream): QualName {
2020-02-25 12:26:21 +01:00
const path: Identifier[] = [];
while (true) {
const t0 = tokens.peek(2);
if (t0.kind !== SyntaxKind.Dot) {
break;
}
path.push(tokens.get() as Identifier)
tokens.get();
}
const name = tokens.get();
if (name.kind !== SyntaxKind.Identifier) {
throw new ParseError(name, [SyntaxKind.Identifier]);
}
const startNode = path.length > 0 ? path[0] : name;
const endNode = name;
return new QualName(name, path, null, [startNode, endNode]);
}
parsePattern(tokens: TokenStream): Patt {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
tokens.get();
return new BindPatt(t0, null, t0)
} else {
throw new ParseError(t0, [SyntaxKind.Identifier])
}
}
parseImportDecl(tokens: TokenStream): ImportDecl {
// Assuming first keyword is 'import'
tokens.get();
const t0 = tokens.get();
if (t0.kind !== SyntaxKind.StringLiteral) {
throw new ParseError(t0, [SyntaxKind.StringLiteral])
}
return new ImportDecl(t0.value, null, t0);
}
2020-02-25 12:26:21 +01:00
parseTypeDecl(tokens: TokenStream): TypeDecl {
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.Identifier) {
const name = this.parseQualName(tokens)
return new TypeRef(name, [], null, name.origNode)
} else {
throw new ParseError(t0, [SyntaxKind.Identifier]);
}
}
2020-02-25 17:55:17 +01:00
parsePrimExpr(tokens: TokenStream): Expr {
2020-02-25 12:26:21 +01:00
const t0 = tokens.peek();
2020-02-25 17:55:17 +01:00
if (t0.kind === SyntaxKind.StringLiteral) {
tokens.get();
return new ConstExpr(new PrimValue(stringType, t0.value), null, t0);
} else if (t0.kind === SyntaxKind.IntegerLiteral) {
tokens.get();
return new ConstExpr(new PrimValue(intType, t0.value), null, t0);
2020-02-25 12:26:21 +01:00
} else if (t0.kind === SyntaxKind.Identifier) {
const name = this.parseQualName(tokens);
return new RefExpr(name, null, name.origNode);
} else {
2020-02-25 17:55:17 +01:00
throw new ParseError(t0, [SyntaxKind.StringLiteral, SyntaxKind.Identifier]);
2020-02-25 12:26:21 +01:00
}
}
parseSyntax(tokens: TokenStream): Syntax {
// Assuming first token is 'syntax'
tokens.get();
2020-03-03 14:53:54 +01:00
const t1 = tokens.get();
if (t1.kind !== SyntaxKind.Braced) {
throw new ParseError(t1, [SyntaxKind.Braced])
}
const innerTokens = t1.toTokenStream();
const pattern = this.parsePattern(innerTokens)
const t2 = innerTokens.get();
if (t2.kind !== SyntaxKind.RArrow) {
throw new ParseError(t2, [SyntaxKind.RArrow]);
}
const body = this.parseBody(innerTokens);
return new Macro(pattern, body)
}
parseExpr(tokens: TokenStream): Expr {
2020-02-25 17:55:17 +01:00
return this.parsePrimExpr(tokens)
}
parseParam(tokens: TokenStream): Param {
2020-02-25 12:26:21 +01:00
let defaultValue = null;
let typeDecl = null;
const pattern = this.parsePattern(tokens)
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Colon) {
tokens.get();
typeDecl = this.parseTypeDecl(tokens);
const t1 = tokens.peek(1);
if (t1.kind === SyntaxKind.EqSign) {
tokens.get();
defaultValue = this.parseExpr(tokens);
}
}
if (t0.kind === SyntaxKind.EqSign) {
2020-02-25 12:26:21 +01:00
tokens.get();
defaultValue = this.parseExpr(tokens);
}
return new Param(pattern, typeDecl, defaultValue)
}
parseVarDecl(tokens: TokenStream): VarDecl {
let isMutable = false;
let typeDecl = null;
let value = null;
2020-02-25 17:55:17 +01:00
// Assuming first token is 'let'
tokens.get();
const t0 = tokens.peek();
if (t0.kind === SyntaxKind.Identifier && t0.text === 'mut') {
tokens.get();
isMutable = true;
}
const bindings = this.parsePattern(tokens)
const t1 = tokens.peek();
if (t1.kind === SyntaxKind.Colon) {
tokens.get();
typeDecl = this.parseTypeDecl(tokens);
}
const t2 = tokens.peek();
if (t2.kind === SyntaxKind.EqSign) {
tokens.get();
value = this.parseExpr(tokens);
}
return new VarDecl(isMutable, bindings, typeDecl, value, null)
2020-02-25 12:26:21 +01:00
}
parseRetStmt(tokens: TokenStream): RetStmt {
// Assuming first token is 'return'
const t0 = tokens.get();
let expr = null;
const t1 = tokens.peek();
if (t1.kind !== SyntaxKind.EOS) {
expr = this.parseExpr(tokens)
}
return new RetStmt(expr, null, [t0, expr.getEndNode()]);
}
parseStmt(tokens: TokenStream): Stmt {
2020-03-03 14:53:54 +01:00
this.parseCallExpr(tokens)
}
parseRecordDecl(tokens: TokenStream): RecordDecl {
let isPublic = false;
let kw = tokens.get();
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.PubKeyword, SyntaxKind.StructKeyword]);
}
if (kw.text === 'pub') {
isPublic = true;
kw = tokens.get();
}
if (kw.kind !== SyntaxKind.Identifier || kw.text !== 'struct') {
throw new ParseError(kw, [SyntaxKind.StructKeyword])
}
const name = this.parseQualName(tokens);
const t2 = tokens.get();
if (t2.kind !== SyntaxKind.Braced) {
throw new ParseError(kw, [SyntaxKind.Braced])
}
let fields = [];
return new RecordDecl(isPublic, name, fields);
}
2020-02-25 12:26:21 +01:00
parseStmts(tokens: TokenStream, origNode: Syntax | null): Stmt[] {
// TODO
return []
}
parseModDecl(tokens: TokenStream): Module {
let isPublic = false;
let kw = tokens.get();
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.PubKeyword, SyntaxKind.ModKeyword]);
}
if (kw.text === 'pub') {
isPublic = true;
kw = tokens.get();
}
if (kw.kind !== SyntaxKind.Identifier || kw.text !== 'mod') {
throw new ParseError(kw, [SyntaxKind.ModKeyword])
}
const name = this.parseQualName(tokens);
const t1 = tokens.get();
if (t1.kind !== SyntaxKind.Braced) {
throw new ParseError(t1, [SyntaxKind.Braced])
}
return new Module(isPublic, name, t1.toSentences());
}
2020-02-25 12:26:21 +01:00
protected assertEmpty(tokens: TokenStream) {
const t0 = tokens.peek(1);
if (t0.kind !== SyntaxKind.EOS) {
throw new ParseError(t0, [SyntaxKind.EOS]);
}
}
2020-03-03 14:53:54 +01:00
parseNewType(tokens: TokenSteam): NewTypeDecl {
let isPublic = false;
let t0 = tokens.get();
if (t0.kind !== SyntaxKind.Identifier) {
throw new ParseError(t0, [SyntaxKind.PubKeyword, SyntaxKind.NewTypeKeyword])
}
if (t0.text === 'pub') {
isPublic = true;
t0 = tokens.get();
if (t0.kind !== SyntaxKind.Identifier) {
throw new ParseError(t0, [SyntaxKind.NewTypeKeyword])
}
}
if (t0.text !== 'newtype') {
throw new ParseError(t0, [SyntaxKind.NewTypeKeyword])
}
const name = tokens.get();
if (name.kind !== SyntaxKind.Identifier) {
throw new ParseError(name, [SyntaxKind.Identifier])
}
return new NewTypeDecl(isPublic, name)
}
parseFuncDecl(tokens: TokenStream, origNode: Syntax | null): FuncDecl {
2020-02-25 12:26:21 +01:00
2020-02-25 17:55:17 +01:00
let target = "Bolt";
let isPublic = false;
2020-02-25 17:55:17 +01:00
const k0 = tokens.peek();
2020-02-25 17:55:17 +01:00
if (k0.kind !== SyntaxKind.Identifier) {
throw new ParseError(k0, [SyntaxKind.PubKeyword, SyntaxKind.ForeignKeyword, SyntaxKind.FnKeyword])
}
if (k0.text === 'pub') {
tokens.get();
isPublic = true;
2020-02-25 17:55:17 +01:00
}
const k1 = tokens.peek();
if (k1.kind !== SyntaxKind.Identifier) {
throw new ParseError(k1, [SyntaxKind.ForeignKeyword, SyntaxKind.FnKeyword])
}
if (k1.text === 'foreign') {
tokens.get();
2020-02-25 17:55:17 +01:00
const l1 = tokens.get();
if (l1.kind !== SyntaxKind.StringLiteral) {
throw new ParseError(l1, [SyntaxKind.StringLiteral])
}
target = l1.value;
}
const k2 = tokens.get();
if (k2.kind !== SyntaxKind.Identifier || k2.text !== 'fn') {
throw new ParseError(k2, [SyntaxKind.FnKeyword])
2020-02-25 17:55:17 +01:00
}
2020-02-25 12:26:21 +01:00
let name: QualName;
let returnType = null;
let body = null;
let params: Param[] = [];
2020-02-25 17:55:17 +01:00
// Parse parameters
2020-02-25 12:26:21 +01:00
const t0 = tokens.peek(1);
const t1 = tokens.peek(2);
const isParamLike = (token: Token) =>
token.kind === SyntaxKind.Identifier || token.kind === SyntaxKind.Parenthesized;
const parseParamLike = (tokens: TokenStream) => {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
tokens.get();
return new Param(new BindPatt(t0, null, t0), null, null, null, t0)
} else if (t0.kind === SyntaxKind.Parenthesized) {
tokens.get();
2020-02-25 17:55:17 +01:00
const innerTokens = t0.toTokenStream();
2020-02-25 12:26:21 +01:00
const param = this.parseParam(innerTokens)
this.assertEmpty(innerTokens);
return param
} else {
throw new ParseError(t0, [SyntaxKind.Identifier, SyntaxKind.Parenthesized])
}
}
if (t0.kind === SyntaxKind.Operator) {
name = new QualName(t0, [], null, t0);
tokens.get();
params.push(parseParamLike(tokens))
} else if (isParamLike(t0) && t1.kind == SyntaxKind.Operator) {
params.push(parseParamLike(tokens));
name = new QualName(t1, [], null, t1);
while (true) {
const t2 = tokens.peek();
if (t2.kind !== SyntaxKind.Operator) {
break;
}
if (t2.text !== t1.text) {
throw new Error(`Operators have to match when defining or declaring an n-ary operator.`);
}
tokens.get();
params.push(parseParamLike(tokens))
}
} else if (t0.kind === SyntaxKind.Identifier) {
name = this.parseQualName(tokens)
const t2 = tokens.get();
if (t2.kind === SyntaxKind.Parenthesized) {
2020-02-25 17:55:17 +01:00
const innerTokens = t2.toTokenStream();
2020-02-25 12:26:21 +01:00
while (true) {
const t3 = innerTokens.peek();
if (t3.kind === SyntaxKind.EOS) {
break;
}
params.push(this.parseParam(innerTokens))
const t4 = innerTokens.get();
if (t4.kind === SyntaxKind.Comma) {
continue;
} else if (t4.kind === SyntaxKind.EOS) {
break;
} else {
throw new ParseError(t4, [SyntaxKind.Comma, SyntaxKind.EOS])
}
}
}
} else {
throw new ParseError(t0, [SyntaxKind.Identifier, SyntaxKind.Operator, SyntaxKind.Parenthesized])
}
// Parse return type
const t2 = tokens.peek();
if (t2.kind === SyntaxKind.RArrow) {
tokens.get();
2020-02-25 17:55:17 +01:00
returnType = this.parseTypeDecl(tokens);
2020-02-25 12:26:21 +01:00
}
// Parse function body
const t3 = tokens.peek();
if (t3.kind === SyntaxKind.Braced) {
2020-02-25 17:55:17 +01:00
tokens.get();
switch (target) {
case "Bolt":
body = this.parseStmts(tokens, t3);
break;
case "JS":
body = acorn.parse(t3.text).body;
break;
default:
throw new Error(`Unrecognised language: ${target}`);
}
2020-02-25 12:26:21 +01:00
}
return new FuncDecl(isPublic, target, name, params, returnType, body, null, origNode)
}
2020-02-25 12:26:21 +01:00
parseSourceElement(tokens: TokenStream): SourceElement {
const t0 = tokens.peek(1);
if (t0.kind === SyntaxKind.Identifier) {
let i = 1;
let kw: Token = t0;
if (t0.text === 'pub') {
i++;
kw = tokens.peek(i);
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.ForeignKeyword, SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
}
}
if (t0.text === 'foreign') {
i += 2;
kw = tokens.peek(i);
if (kw.kind !== SyntaxKind.Identifier) {
throw new ParseError(kw, [SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
}
}
switch (kw.text) {
2020-03-03 14:53:54 +01:00
case 'newtype':
return this.parseNewType(tokens);
case 'syntax':
return this.parseSyntax(tokens);
case 'mod':
return this.parseModDecl(tokens);
case 'fn':
return this.parseFuncDecl(tokens, null);
case 'let':
return this.parseVarDecl(tokens);
case 'struct':
return this.parseRecordDecl(tokens);
case 'enum':
return this.parseVariantDecl(tokens);
default:
2020-03-03 14:53:54 +01:00
try {
return this.parseExpr(tokens)
} catch (e) {
if (e instanceof ParseError) {
throw new ParseError(kw, [...e.expected, SyntaxKind.ModKeyword, SyntaxKind.LetKeyword, SyntaxKind.FnKeyword, SyntaxKind.EnumKeyword, SyntaxKind.StructKeyword])
} else {
throw e;
}
}
}
} else {
return this.parseStmt(tokens)
}
2020-02-25 12:26:21 +01:00
}
2020-03-03 14:53:54 +01:00
getOperatorDesc(seekArity: number, seekName: string): OperatorInfo {
for (let i = 0; i < this.operatorTable.length; ++i) {
for (const [kind, arity, name] of this.operatorTable[i]) {
if (artity == seekArity && name === seekName) {
return {
kind,
name,
arity,
precedence: i
}
}
}
}
}
parseBinOp(tokens: TokenStream, lhs: Expr , minPrecedence: number) {
let lookahead = tokens.peek(1);
while (true) {
if (lookahead.kind !== SyntaxKind.Operator) {
break;
}
const lookaheadDesc = this.getOperatorDesc(2, lookahead.text);
if (lookaheadDesc === null || lookaheadDesc.precedence < minPrecedence) {
break;
}
const op = lookahead;
const opDesc = this.getOperatorDesc(2, op.text);
tokens.get();
let rhs = this.parsePrimExpr(tokens)
lookahead = tokens.peek()
while (lookaheadDesc.arity === 2
&& ((lookaheadDesc.precedence > opDesc.precedence)
|| lookaheadDesc.kind === OperatorKind.InfixR && lookaheadDesc.precedence === opDesc.precedence)) {
rhs = this.parseBinOp(tokens, rhs, lookaheadDesc.precedence)
}
lookahead = tokens.peek();
lhs = new CallExpr(new RefExpr(new QualName(op, [])), [lhs, rhs]);
}
return lhs
}
parseCallExpr(tokens: TokenStream): CallExpr {
2020-02-25 17:55:17 +01:00
const operator = this.parsePrimExpr(tokens)
const args: Expr[] = []
const t2 = tokens.get();
if (t2.kind !== SyntaxKind.Parenthesized) {
throw new ParseError(t2, [SyntaxKind.Parenthesized])
2020-02-25 12:26:21 +01:00
}
2020-02-25 17:55:17 +01:00
const innerTokens = t2.toTokenStream();
while (true) {
const t3 = innerTokens.peek();
if (t3.kind === SyntaxKind.EOS) {
break;
}
args.push(this.parseExpr(innerTokens))
const t4 = innerTokens.get();
if (t4.kind === SyntaxKind.EOS) {
break
} else if (t4.kind !== SyntaxKind.Comma){
throw new ParseError(t4, [SyntaxKind.Comma])
}
}
return new CallExpr(operator, args, null)
2020-02-25 12:26:21 +01:00
}
}