2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
import XRegExp from "xregexp"
|
|
|
|
|
|
|
|
import {
|
|
|
|
TextFile,
|
|
|
|
TextPos,
|
|
|
|
TextSpan,
|
2020-05-10 15:56:34 +02:00
|
|
|
} from "./text"
|
|
|
|
|
|
|
|
import {
|
2020-05-10 23:50:42 +02:00
|
|
|
setParents,
|
2020-05-10 15:56:34 +02:00
|
|
|
SyntaxKind,
|
|
|
|
BoltToken,
|
|
|
|
BoltSentence,
|
2020-05-22 19:50:47 +02:00
|
|
|
createEndOfFile,
|
2020-05-10 15:56:34 +02:00
|
|
|
createBoltSentence,
|
|
|
|
createBoltIdentifier,
|
|
|
|
createBoltRArrow,
|
|
|
|
createBoltOperator,
|
|
|
|
createBoltParenthesized,
|
|
|
|
createBoltBraced,
|
|
|
|
createBoltBracketed,
|
|
|
|
createBoltSourceFile,
|
|
|
|
createBoltSemi,
|
|
|
|
createBoltComma,
|
|
|
|
createBoltStringLiteral,
|
|
|
|
createBoltIntegerLiteral,
|
|
|
|
createBoltColon,
|
|
|
|
createBoltDot,
|
|
|
|
createBoltEqSign,
|
2020-05-10 23:50:42 +02:00
|
|
|
createBoltPubKeyword,
|
|
|
|
createBoltMutKeyword,
|
|
|
|
createBoltStructKeyword,
|
|
|
|
createBoltEnumKeyword,
|
|
|
|
createBoltForeignKeyword,
|
|
|
|
createBoltAssignment,
|
|
|
|
createBoltYieldKeyword,
|
|
|
|
createBoltReturnKeyword,
|
|
|
|
createBoltFnKeyword,
|
|
|
|
createBoltLArrow,
|
|
|
|
createBoltDotDot,
|
2020-05-22 19:50:47 +02:00
|
|
|
createJSIdentifier,
|
|
|
|
JSToken,
|
|
|
|
createBoltLtSign,
|
|
|
|
createBoltGtSign,
|
|
|
|
createBoltModKeyword,
|
|
|
|
createBoltTypeKeyword,
|
2020-02-24 18:30:39 +01:00
|
|
|
} from "./ast"
|
|
|
|
|
2020-05-10 15:56:34 +02:00
|
|
|
export enum PunctType {
|
|
|
|
Paren,
|
|
|
|
Bracket,
|
|
|
|
Brace,
|
|
|
|
}
|
|
|
|
|
2020-02-24 18:30:39 +01:00
|
|
|
function escapeChar(ch: string) {
|
|
|
|
switch (ch) {
|
|
|
|
case '\a': return '\\a';
|
|
|
|
case '\b': return '\\b';
|
|
|
|
case '\f': return '\\f';
|
|
|
|
case '\n': return '\\n';
|
|
|
|
case '\r': return '\\r';
|
|
|
|
case '\t': return '\\t';
|
|
|
|
case '\v': return '\\v';
|
|
|
|
case '\0': return '\\0';
|
|
|
|
case '\'': return '\\\'';
|
|
|
|
default:
|
|
|
|
const code = ch.charCodeAt(0);
|
|
|
|
if (code >= 0x20 && code <= 0x7E) {
|
|
|
|
return ch
|
|
|
|
} else if (code < 0x7F) {
|
|
|
|
return `\\x${code.toString(16).padStart(2, '0')}`
|
|
|
|
} else {
|
|
|
|
return `\\u${code.toString(16).padStart(4, '0')}`
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function getPunctType(ch: string) {
|
|
|
|
switch (ch) {
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
return PunctType.Paren;
|
|
|
|
case '[':
|
|
|
|
case ']':
|
|
|
|
return PunctType.Bracket;
|
|
|
|
case '{':
|
|
|
|
case '}':
|
|
|
|
return PunctType.Brace;
|
|
|
|
default:
|
2020-02-25 17:55:17 +01:00
|
|
|
return null;
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function isClosePunct(ch: string) {
|
|
|
|
switch (ch) {
|
|
|
|
case '}':
|
|
|
|
case ']':
|
|
|
|
case ')':
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function isOpenPunct(ch: string) {
|
|
|
|
switch (ch) {
|
|
|
|
case '{':
|
|
|
|
case '(':
|
|
|
|
case '[':
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class ScanError extends Error {
|
|
|
|
constructor(public file: TextFile, public position: TextPos, public char: string) {
|
2020-05-10 15:56:34 +02:00
|
|
|
super(`${file.origPath}:${position.line}:${position.column}: unexpected char '${escapeChar(char)}'`)
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-26 18:53:28 +01:00
|
|
|
function isDigit(ch: string) {
|
|
|
|
return XRegExp('\\p{Nd}').test(ch)
|
|
|
|
}
|
|
|
|
|
2020-02-24 18:30:39 +01:00
|
|
|
function isWhiteSpace(ch: string) {
|
|
|
|
return ch == '\n' || XRegExp('\\p{Zs}').test(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
function isNewLine(ch: string) {
|
|
|
|
return ch == '\n'
|
|
|
|
}
|
|
|
|
|
|
|
|
function isIdentStart(ch: string) {
|
|
|
|
return ch == '_' || XRegExp('\\p{L}').test(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
function isIdentPart(ch: string) {
|
|
|
|
return ch == '_' || XRegExp('\\p{L}').test(ch)
|
|
|
|
}
|
|
|
|
|
2020-05-10 23:50:42 +02:00
|
|
|
function isSymbol(ch: string) {
|
2020-05-22 19:50:47 +02:00
|
|
|
return /[=+\/\-*%$!><&^|]/.test(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function isJSWhiteSpace(ch: string): boolean {
|
|
|
|
return ch === '\u0009'
|
|
|
|
|| ch === '\u000B'
|
|
|
|
|| ch === '\u000C'
|
|
|
|
|| ch === '\u0020'
|
|
|
|
|| ch === '\u00A0'
|
|
|
|
|| ch === '\u000B'
|
|
|
|
|| ch === '\uFEFF'
|
|
|
|
|| XRegExp('\\p{Zs}').test(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
function isJSIdentStart(ch: string): boolean {
|
|
|
|
return XRegExp('[\\p{ID_Start}$_\\]').test(ch)
|
|
|
|
}
|
|
|
|
|
|
|
|
function isJSIdentPart(ch: string): boolean {
|
|
|
|
return XRegExp('[\u200C\u200D\\p{ID_Continue}$\\]').test(ch)
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
|
2020-05-10 23:50:42 +02:00
|
|
|
//function isOperatorPart(ch: string) {
|
|
|
|
//return /[=+\-*\/%$!><]/.test(ch)
|
|
|
|
//}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
const EOF = ''
|
|
|
|
|
|
|
|
export class Scanner {
|
|
|
|
|
|
|
|
protected buffer: string[] = [];
|
2020-05-10 15:56:34 +02:00
|
|
|
protected scanned: BoltToken[] = [];
|
2020-02-25 17:55:17 +01:00
|
|
|
protected currPos: TextPos;
|
2020-02-24 18:30:39 +01:00
|
|
|
protected offset = 0;
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
constructor(public file: TextFile, public input: string, startPos = new TextPos(0,1,1)) {
|
|
|
|
this.currPos = startPos;
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
protected readChar() {
|
|
|
|
if (this.offset == this.input.length) {
|
|
|
|
return EOF
|
|
|
|
}
|
|
|
|
return this.input[this.offset++]
|
|
|
|
}
|
|
|
|
|
|
|
|
protected peekChar(count = 1) {
|
|
|
|
while (this.buffer.length < count) {
|
|
|
|
this.buffer.push(this.readChar());
|
|
|
|
}
|
|
|
|
return this.buffer[count - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
protected getChar() {
|
|
|
|
|
|
|
|
const ch = this.buffer.length > 0
|
|
|
|
? this.buffer.shift()!
|
|
|
|
: this.readChar()
|
|
|
|
|
|
|
|
if (ch == EOF) {
|
|
|
|
return EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isNewLine(ch)) {
|
|
|
|
this.currPos.line += 1;
|
|
|
|
this.currPos.column = 1;
|
|
|
|
} else {
|
|
|
|
this.currPos.column += 1;
|
|
|
|
}
|
|
|
|
this.currPos.offset += 1;
|
|
|
|
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
protected takeWhile(pred: (ch: string) => boolean) {
|
|
|
|
let text = this.getChar();
|
|
|
|
while (true) {
|
|
|
|
const c0 = this.peekChar();
|
|
|
|
if (!pred(c0)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
this.getChar()
|
|
|
|
text += c0;
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2020-05-10 15:56:34 +02:00
|
|
|
scanToken(): BoltToken {
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
const c0 = this.peekChar();
|
|
|
|
|
|
|
|
if (isWhiteSpace(c0)) {
|
|
|
|
this.getChar();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const startPos = this.currPos.clone()
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
if (c0 == EOF) {
|
2020-05-22 19:50:47 +02:00
|
|
|
return createEndOfFile(new TextSpan(this.file, startPos, startPos));
|
2020-02-25 17:55:17 +01:00
|
|
|
}
|
2020-02-24 20:47:07 +01:00
|
|
|
|
2020-02-24 19:23:34 +01:00
|
|
|
switch (c0) {
|
|
|
|
case ';':
|
|
|
|
this.getChar();
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltSemi(new TextSpan(this.file, startPos, this.currPos.clone()));
|
2020-02-24 19:23:34 +01:00
|
|
|
case ',':
|
|
|
|
this.getChar();
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltComma(new TextSpan(this.file, startPos, this.currPos.clone()));
|
2020-02-24 19:23:34 +01:00
|
|
|
case ':':
|
|
|
|
this.getChar();
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltColon(new TextSpan(this.file, startPos, this.currPos.clone()));
|
2020-02-24 19:23:34 +01:00
|
|
|
}
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
if (c0 === '"') {
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
this.getChar();
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
let text = ''
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
while (true) {
|
2020-02-25 17:55:17 +01:00
|
|
|
const c1 = this.getChar();
|
|
|
|
if (c1 === EOF) {
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), EOF);
|
|
|
|
}
|
|
|
|
if (c1 === '"') {
|
|
|
|
break;
|
|
|
|
} else if (c1 === '\\') {
|
|
|
|
this.scanEscapeSequence()
|
|
|
|
} else {
|
|
|
|
text += c1
|
|
|
|
}
|
|
|
|
}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
const endPos = this.currPos.clone();
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltStringLiteral(text, new TextSpan(this.file, startPos, endPos))
|
2020-02-25 17:55:17 +01:00
|
|
|
|
2020-02-26 18:53:28 +01:00
|
|
|
} else if (isDigit(c0)) {
|
|
|
|
|
|
|
|
const digits = this.takeWhile(isDigit)
|
|
|
|
const endPos = this.currPos.clone();
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltIntegerLiteral(BigInt(digits), new TextSpan(this.file, startPos, endPos));
|
2020-02-26 18:53:28 +01:00
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
} else if (isOpenPunct(c0)) {
|
|
|
|
|
|
|
|
this.getChar();
|
|
|
|
|
|
|
|
const punctType = getPunctType(c0);
|
|
|
|
let punctCount = 1;
|
|
|
|
let text = ''
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
const c1 = this.getChar();
|
2020-02-24 18:35:07 +01:00
|
|
|
|
2020-02-24 18:30:39 +01:00
|
|
|
if (c1 === EOF) {
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), EOF)
|
|
|
|
}
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
if (punctType == getPunctType(c1)) {
|
|
|
|
if (isClosePunct(c1)) {
|
|
|
|
punctCount--;
|
|
|
|
if (punctCount === 0)
|
|
|
|
break;
|
2020-02-24 18:30:39 +01:00
|
|
|
} else {
|
2020-02-25 17:55:17 +01:00
|
|
|
punctCount++;
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-25 17:55:17 +01:00
|
|
|
text += c1
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
const endPos = this.currPos.clone();
|
|
|
|
|
2020-02-24 20:47:07 +01:00
|
|
|
switch (punctType) {
|
|
|
|
case PunctType.Brace:
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltBraced(text, new TextSpan(this.file, startPos, endPos));
|
2020-02-24 20:47:07 +01:00
|
|
|
case PunctType.Paren:
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltParenthesized(text, new TextSpan(this.file, startPos, endPos));
|
2020-02-24 20:47:07 +01:00
|
|
|
case PunctType.Bracket:
|
2020-05-10 15:56:34 +02:00
|
|
|
return createBoltBracketed(text, new TextSpan(this.file, startPos, endPos));
|
2020-02-24 20:47:07 +01:00
|
|
|
default:
|
|
|
|
throw new Error("Got an invalid state.")
|
|
|
|
}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
|
|
|
} else if (isIdentStart(c0)) {
|
|
|
|
|
|
|
|
const name = this.takeWhile(isIdentPart);
|
|
|
|
const endPos = this.currPos.clone();
|
2020-05-10 23:50:42 +02:00
|
|
|
const span = new TextSpan(this.file, startPos, endPos);
|
|
|
|
switch (name) {
|
|
|
|
case 'pub': return createBoltPubKeyword(span);
|
2020-05-22 19:50:47 +02:00
|
|
|
case 'mod': return createBoltModKeyword(span);
|
2020-05-10 23:50:42 +02:00
|
|
|
case 'fn': return createBoltFnKeyword(span);
|
|
|
|
case 'return': return createBoltReturnKeyword(span);
|
|
|
|
case 'yield': return createBoltYieldKeyword(span);
|
2020-05-22 19:50:47 +02:00
|
|
|
case 'type': return createBoltTypeKeyword(span);
|
2020-05-10 23:50:42 +02:00
|
|
|
case 'foreign': return createBoltForeignKeyword(span);
|
|
|
|
case 'let': return createBoltPubKeyword(span);
|
|
|
|
case 'mut': return createBoltMutKeyword(span);
|
|
|
|
case 'struct': return createBoltStructKeyword(span);
|
|
|
|
case 'enum': return createBoltEnumKeyword(span);
|
|
|
|
default: return createBoltIdentifier(name, span);
|
|
|
|
}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-05-10 23:50:42 +02:00
|
|
|
} else if (isSymbol(c0)) {
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-05-10 23:50:42 +02:00
|
|
|
const text = this.takeWhile(isSymbol)
|
2020-02-24 18:30:39 +01:00
|
|
|
const endPos = this.currPos.clone()
|
2020-02-24 20:47:07 +01:00
|
|
|
const span = new TextSpan(this.file, startPos, endPos);
|
|
|
|
|
2020-05-10 23:50:42 +02:00
|
|
|
switch (text) {
|
|
|
|
case '->': return createBoltRArrow(span);
|
|
|
|
case '<-': return createBoltLArrow(span);
|
2020-05-22 19:50:47 +02:00
|
|
|
case '<': return createBoltLtSign(span);
|
|
|
|
case '>': return createBoltGtSign(span);
|
2020-05-10 23:50:42 +02:00
|
|
|
case '.': return createBoltDot(span);
|
|
|
|
case '..': return createBoltDotDot(span);
|
|
|
|
case '=': return createBoltEqSign(span);
|
2020-05-22 19:50:47 +02:00
|
|
|
case '==': return createBoltOperator(text, span);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (text.endsWith('=')) {
|
|
|
|
const operator = text.substring(0, text.length-1);
|
|
|
|
return createBoltAssignment(operator.length === 0 ? null : operator, span);
|
2020-02-24 20:47:07 +01:00
|
|
|
}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-05-22 19:50:47 +02:00
|
|
|
return createBoltOperator(text, span);
|
|
|
|
|
2020-02-24 18:30:39 +01:00
|
|
|
} else {
|
|
|
|
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), c0);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-05-22 19:50:47 +02:00
|
|
|
public peek(count = 1): BoltToken {
|
2020-02-25 17:55:17 +01:00
|
|
|
while (this.scanned.length < count) {
|
|
|
|
this.scanned.push(this.scanToken());
|
|
|
|
}
|
|
|
|
return this.scanned[count - 1];
|
|
|
|
}
|
|
|
|
|
2020-05-22 19:50:47 +02:00
|
|
|
public get(): BoltToken {
|
2020-02-25 17:55:17 +01:00
|
|
|
return this.scanned.length > 0
|
|
|
|
? this.scanned.shift()!
|
|
|
|
: this.scanToken();
|
|
|
|
}
|
|
|
|
|
2020-02-26 18:53:28 +01:00
|
|
|
scanTokens() {
|
2020-02-24 19:16:33 +01:00
|
|
|
|
2020-05-10 15:56:34 +02:00
|
|
|
const elements: BoltSentence[] = []
|
2020-02-24 19:16:33 +01:00
|
|
|
|
2020-02-24 19:23:34 +01:00
|
|
|
outer: while (true) {
|
2020-02-24 19:16:33 +01:00
|
|
|
|
2020-05-10 15:56:34 +02:00
|
|
|
const tokens: BoltToken[] = [];
|
2020-02-24 19:16:33 +01:00
|
|
|
|
2020-02-24 19:23:34 +01:00
|
|
|
inner: while (true) {
|
2020-02-24 19:16:33 +01:00
|
|
|
const token = this.scanToken();
|
2020-05-22 19:50:47 +02:00
|
|
|
if (token.kind === SyntaxKind.EndOfFile) {
|
2020-02-24 19:23:34 +01:00
|
|
|
if (tokens.length === 0) {
|
|
|
|
break outer;
|
|
|
|
} else {
|
|
|
|
break inner;
|
|
|
|
}
|
2020-02-24 19:16:33 +01:00
|
|
|
}
|
2020-05-10 15:56:34 +02:00
|
|
|
if (token.kind === SyntaxKind.BoltSemi) {
|
2020-02-24 19:16:33 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
tokens.push(token)
|
2020-05-10 15:56:34 +02:00
|
|
|
if (token.kind === SyntaxKind.BoltBraced) {
|
2020-02-24 19:16:33 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-24 19:23:34 +01:00
|
|
|
if (tokens.length > 0) {
|
2020-02-26 18:53:28 +01:00
|
|
|
elements.push(
|
2020-05-10 15:56:34 +02:00
|
|
|
createBoltSentence(
|
2020-02-26 18:53:28 +01:00
|
|
|
tokens,
|
|
|
|
new TextSpan(this.file, tokens[0].span!.start.clone(), tokens[tokens.length-1].span!.end.clone())
|
|
|
|
)
|
|
|
|
)
|
2020-02-24 19:16:33 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-02-26 18:53:28 +01:00
|
|
|
return elements
|
|
|
|
}
|
2020-02-24 18:30:39 +01:00
|
|
|
|
2020-05-22 19:50:47 +02:00
|
|
|
public scan() {
|
2020-02-26 18:53:28 +01:00
|
|
|
const startPos = this.currPos.clone();
|
|
|
|
const elements = this.scanTokens();
|
|
|
|
const endPos = this.currPos.clone();
|
2020-05-10 23:50:42 +02:00
|
|
|
const sourceFile = createBoltSourceFile(elements, new TextSpan(this.file, startPos, endPos));
|
|
|
|
setParents(sourceFile);
|
|
|
|
return sourceFile;
|
2020-02-24 18:30:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2020-05-22 19:50:47 +02:00
|
|
|
|
|
|
|
export class JSScanner {
|
|
|
|
|
|
|
|
private buffer: string[] = [];
|
|
|
|
private scanned: JSToken[] = [];
|
|
|
|
private offset = 0;
|
|
|
|
|
|
|
|
constructor(
|
|
|
|
private file: TextFile,
|
|
|
|
private input: string,
|
|
|
|
private currPos: TextPos = new TextPos(0,1,1),
|
|
|
|
) {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
protected readChar() {
|
|
|
|
if (this.offset == this.input.length) {
|
|
|
|
return EOF
|
|
|
|
}
|
|
|
|
return this.input[this.offset++]
|
|
|
|
}
|
|
|
|
|
|
|
|
protected peekChar(count = 1) {
|
|
|
|
while (this.buffer.length < count) {
|
|
|
|
this.buffer.push(this.readChar());
|
|
|
|
}
|
|
|
|
return this.buffer[count - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
protected getChar() {
|
|
|
|
|
|
|
|
const ch = this.buffer.length > 0
|
|
|
|
? this.buffer.shift()!
|
|
|
|
: this.readChar()
|
|
|
|
|
|
|
|
if (ch == EOF) {
|
|
|
|
return EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isNewLine(ch)) {
|
|
|
|
this.currPos.line += 1;
|
|
|
|
this.currPos.column = 1;
|
|
|
|
} else {
|
|
|
|
this.currPos.column += 1;
|
|
|
|
}
|
|
|
|
this.currPos.offset += 1;
|
|
|
|
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
|
|
|
private assertChar(expected: string) {
|
|
|
|
const actual = this.getChar();
|
|
|
|
if (actual !== expected) {
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), actual);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private scanLineComment(): string {
|
|
|
|
let text = '';
|
|
|
|
this.assertChar('/');
|
|
|
|
this.assertChar('/')
|
|
|
|
while (true) {
|
|
|
|
const c2 = this.peekChar();
|
|
|
|
if (c2 === '\n') {
|
|
|
|
this.getChar();
|
|
|
|
if (this.peekChar() === '\r') {
|
|
|
|
this.getChar();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c2 === EOF) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
text += this.getChar();
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
private scanMultiLineComment(): string {
|
|
|
|
let text = '';
|
|
|
|
while (true) {
|
|
|
|
const c2 = this.getChar();
|
|
|
|
if (c2 === '*') {
|
|
|
|
const c3 = this.getChar();
|
|
|
|
if (c3 === '/') {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
text += c2 + c3;
|
|
|
|
} else if (c2 === EOF) {
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), c2);
|
|
|
|
} else {
|
|
|
|
text += c2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
private skipComments() {
|
|
|
|
while (true) {
|
|
|
|
const c0 = this.peekChar();
|
|
|
|
if (c0 === '/') {
|
|
|
|
const c1 = this.peekChar(2);
|
|
|
|
if (c1 == '/') {
|
|
|
|
this.scanLineComment();
|
|
|
|
} else if (c1 === '*') {
|
|
|
|
this.scanMultiLineComment();
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (isWhiteSpace(c0)) {
|
|
|
|
this.getChar();
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private scanHexDigit(): number {
|
|
|
|
const startPos = this.currPos.clone();
|
|
|
|
const c0 = this.getChar();
|
|
|
|
switch (c0.toLowerCase()) {
|
|
|
|
case '0': return 0;
|
|
|
|
case '1': return 1;
|
|
|
|
case '2': return 2;
|
|
|
|
case '3': return 3;
|
|
|
|
case '4': return 4;
|
|
|
|
case '5': return 5;
|
|
|
|
case '6': return 6;
|
|
|
|
case '7': return 7;
|
|
|
|
case '8': return 8;
|
|
|
|
case '9': return 0;
|
|
|
|
case 'a': return 10;
|
|
|
|
case 'b': return 11;
|
|
|
|
case 'c': return 12;
|
|
|
|
case 'd': return 13;
|
|
|
|
case 'e': return 14;
|
|
|
|
case 'f': return 15;
|
|
|
|
default:
|
|
|
|
throw new ScanError(this.file, startPos, c0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private scanUnicodeEscapeSequence() {
|
|
|
|
throw new Error(`Scanning unicode escape sequences is not yet implemented.`);
|
|
|
|
}
|
|
|
|
|
|
|
|
public scan(): JSToken {
|
|
|
|
this.skipComments();
|
|
|
|
const c0 = this.peekChar();
|
|
|
|
const startPos = this.currPos.clone();
|
|
|
|
if (isJSIdentStart(c0)) {
|
|
|
|
let name = '';
|
|
|
|
while (true) {
|
|
|
|
const c0 = this.peekChar();
|
|
|
|
if (!isJSIdentPart(c0)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (c0 === '\\') {
|
|
|
|
name += this.scanUnicodeEscapeSequence();
|
|
|
|
} else {
|
|
|
|
name += this.getChar();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const endPos = this.currPos.clone();
|
|
|
|
return createJSIdentifier(name, new TextSpan(this.file, startPos, endPos))
|
|
|
|
} else {
|
|
|
|
throw new ScanError(this.file, this.currPos.clone(), c0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public peek(count = 1): JSToken {
|
|
|
|
while (this.scanned.length < count) {
|
|
|
|
this.scanned.push(this.scan());
|
|
|
|
}
|
|
|
|
return this.scanned[count - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
public get(): JSToken {
|
|
|
|
return this.scanned.length > 0
|
|
|
|
? this.scanned.shift()!
|
|
|
|
: this.scan();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|