overleaf/services/web/public/js/ace/mode/lua/luaparse.js
2014-02-12 10:23:40 +00:00

1989 lines
No EOL
56 KiB
JavaScript
Executable file

define(function(require, exports, module) {
/*global exports:true module:true require:true define:true global:true */
(function (root, name, factory) {
factory(exports)
}(this, 'luaparse', function (exports) {
'use strict';
exports.version = '0.1.4';
var input, options, length;
// Options can be set either globally on the parser object through
// defaultOptions, or during the parse call.
var defaultOptions = exports.defaultOptions = {
// Explicitly tell the parser when the input ends.
wait: false
// Store comments as an array in the chunk object.
, comments: true
// Track identifier scopes by adding an isLocal attribute to each
// identifier-node.
, scope: false
// Store location information on each syntax node as
// `loc: { start: { line, column }, end: { line, column } }`.
, locations: false
// Store the start and end character locations on each syntax node as
// `range: [start, end]`.
, ranges: false
};
// The available tokens expressed as enum flags so they can be checked with
// bitwise operations.
var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8
, NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64
, NilLiteral = 128, VarargLiteral = 256;
exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral
, Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral
, Punctuator: Punctuator, BooleanLiteral: BooleanLiteral
, NilLiteral: NilLiteral, VarargLiteral: VarargLiteral
};
// As this parser is a bit different from luas own, the error messages
// will be different in some situations.
var errors = exports.errors = {
unexpected: 'Unexpected %1 \'%2\' near \'%3\''
, expected: '\'%1\' expected near \'%2\''
, expectedToken: '%1 expected near \'%2\''
, unfinishedString: 'unfinished string near \'%1\''
, malformedNumber: 'malformed number near \'%1\''
};
// ### Abstract Syntax Tree
//
// The default AST structure is inspired by the Mozilla Parser API but can
// easily be customized by overriding these functions.
var ast = exports.ast = {
labelStatement: function(label) {
return {
type: 'LabelStatement'
, label: label
};
}
, breakStatement: function() {
return {
type: 'BreakStatement'
};
}
, gotoStatement: function(label) {
return {
type: 'GotoStatement'
, label: label
};
}
, returnStatement: function(args) {
return {
type: 'ReturnStatement'
, 'arguments': args
};
}
, ifStatement: function(clauses) {
return {
type: 'IfStatement'
, clauses: clauses
};
}
, ifClause: function(condition, body) {
return {
type: 'IfClause'
, condition: condition
, body: body
};
}
, elseifClause: function(condition, body) {
return {
type: 'ElseifClause'
, condition: condition
, body: body
};
}
, elseClause: function(body) {
return {
type: 'ElseClause'
, body: body
};
}
, whileStatement: function(condition, body) {
return {
type: 'WhileStatement'
, condition: condition
, body: body
};
}
, doStatement: function(body) {
return {
type: 'DoStatement'
, body: body
};
}
, repeatStatement: function(condition, body) {
return {
type: 'RepeatStatement'
, condition: condition
, body: body
};
}
, localStatement: function(variables, init) {
return {
type: 'LocalStatement'
, variables: variables
, init: init
};
}
, assignmentStatement: function(variables, init) {
return {
type: 'AssignmentStatement'
, variables: variables
, init: init
};
}
, callStatement: function(expression) {
return {
type: 'CallStatement'
, expression: expression
};
}
, functionStatement: function(identifier, parameters, isLocal, body) {
return {
type: 'FunctionDeclaration'
, identifier: identifier
, isLocal: isLocal
, parameters: parameters
, body: body
};
}
, forNumericStatement: function(variable, start, end, step, body) {
return {
type: 'ForNumericStatement'
, variable: variable
, start: start
, end: end
, step: step
, body: body
};
}
, forGenericStatement: function(variables, iterators, body) {
return {
type: 'ForGenericStatement'
, variables: variables
, iterators: iterators
, body: body
};
}
, chunk: function(body) {
return {
type: 'Chunk'
, body: body
};
}
, identifier: function(name) {
return {
type: 'Identifier'
, name: name
};
}
, literal: function(type, value, raw) {
type = (type === StringLiteral) ? 'StringLiteral'
: (type === NumericLiteral) ? 'NumericLiteral'
: (type === BooleanLiteral) ? 'BooleanLiteral'
: (type === NilLiteral) ? 'NilLiteral'
: 'VarargLiteral';
return {
type: type
, value: value
, raw: raw
};
}
, tableKey: function(key, value) {
return {
type: 'TableKey'
, key: key
, value: value
};
}
, tableKeyString: function(key, value) {
return {
type: 'TableKeyString'
, key: key
, value: value
};
}
, tableValue: function(value) {
return {
type: 'TableValue'
, value: value
};
}
, tableConstructorExpression: function(fields) {
return {
type: 'TableConstructorExpression'
, fields: fields
};
}
, binaryExpression: function(operator, left, right) {
var type = ('and' === operator || 'or' === operator) ?
'LogicalExpression' :
'BinaryExpression';
return {
type: type
, operator: operator
, left: left
, right: right
};
}
, unaryExpression: function(operator, argument) {
return {
type: 'UnaryExpression'
, operator: operator
, argument: argument
};
}
, memberExpression: function(base, indexer, identifier) {
return {
type: 'MemberExpression'
, indexer: indexer
, identifier: identifier
, base: base
};
}
, indexExpression: function(base, index) {
return {
type: 'IndexExpression'
, base: base
, index: index
};
}
, callExpression: function(base, args) {
return {
type: 'CallExpression'
, base: base
, 'arguments': args
};
}
, tableCallExpression: function(base, args) {
return {
type: 'TableCallExpression'
, base: base
, 'arguments': args
};
}
, stringCallExpression: function(base, argument) {
return {
type: 'StringCallExpression'
, base: base
, argument: argument
};
}
, comment: function(value, raw) {
return {
type: 'Comment'
, value: value
, raw: raw
};
}
};
// Wrap up the node object.
function finishNode(node) {
// Pop a `Marker` off the location-array and attach its location data.
if (trackLocations) {
var location = locations.pop();
location.complete();
if (options.locations) node.loc = location.loc;
if (options.ranges) node.range = location.range;
}
return node;
}
// Helpers
// -------
var slice = Array.prototype.slice
, toString = Object.prototype.toString
, indexOf = function indexOf(array, element) {
for (var i = 0, length = array.length; i < length; i++) {
if (array[i] === element) return i;
}
return -1;
};
// Iterate through an array of objects and return the index of an object
// with a matching property.
function indexOfObject(array, property, element) {
for (var i = 0, length = array.length; i < length; i++) {
if (array[i][property] === element) return i;
}
return -1;
}
// A sprintf implementation using %index (beginning at 1) to input
// arguments in the format string.
//
// Example:
//
// // Unexpected function in token
// sprintf('Unexpected %2 in %1.', 'token', 'function');
function sprintf(format) {
var args = slice.call(arguments, 1);
format = format.replace(/%(\d)/g, function (match, index) {
return '' + args[index - 1] || '';
});
return format;
}
// Returns a new object with the properties from all objectes passed as
// arguments. Last argument takes precedence.
//
// Example:
//
// this.options = extend(options, { output: false });
function extend() {
var args = slice.call(arguments)
, dest = {}
, src, prop;
for (var i = 0, length = args.length; i < length; i++) {
src = args[i];
for (prop in src) if (src.hasOwnProperty(prop)) {
dest[prop] = src[prop];
}
}
return dest;
}
// ### Error functions
// #### Raise an exception.
//
// Raise an exception by passing a token, a string format and its paramters.
//
// The passed tokens location will automatically be added to the error
// message if it exists, if not it will default to the lexers current
// position.
//
// Example:
//
// // [1:0] expected [ near (
// raise(token, "expected %1 near %2", '[', token.value);
function raise(token) {
var message = sprintf.apply(null, slice.call(arguments, 1))
, error, col;
if ('undefined' !== typeof token.line) {
col = token.range[0] - token.lineStart;
error = new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message));
error.line = token.line;
error.index = token.range[0];
error.column = col;
} else {
col = index - lineStart + 1;
error = new SyntaxError(sprintf('[%1:%2] %3', line, col, message));
error.index = index;
error.line = line;
error.column = col;
}
throw error;
}
// #### Raise an unexpected token error.
//
// Example:
//
// // expected <name> near '0'
// raiseUnexpectedToken('<name>', token);
function raiseUnexpectedToken(type, token) {
raise(token, errors.expectedToken, type, token.value);
}
// #### Raise a general unexpected error
//
// Usage should pass either a token object or a symbol string which was
// expected. We can also specify a nearby token such as <eof>, this will
// default to the currently active token.
//
// Example:
//
// // Unexpected symbol 'end' near '<eof>'
// unexpected(token);
//
// If there's no token in the buffer it means we have reached <eof>.
function unexpected(found, near) {
if ('undefined' === typeof near) near = lookahead.value;
if ('undefined' !== typeof found.type) {
var type;
switch (found.type) {
case StringLiteral: type = 'string'; break;
case Keyword: type = 'keyword'; break;
case Identifier: type = 'identifier'; break;
case NumericLiteral: type = 'number'; break;
case Punctuator: type = 'symbol'; break;
case BooleanLiteral: type = 'boolean'; break;
case NilLiteral:
return raise(found, errors.unexpected, 'symbol', 'nil', near);
}
return raise(found, errors.unexpected, type, found.value, near);
}
return raise(found, errors.unexpected, 'symbol', found, near);
}
// Lexer
// -----
//
// The lexer, or the tokenizer reads the input string character by character
// and derives a token left-right. To be as efficient as possible the lexer
// prioritizes the common cases such as identifiers. It also works with
// character codes instead of characters as string comparisons was the
// biggest bottleneck of the parser.
//
// If `options.comments` is enabled, all comments encountered will be stored
// in an array which later will be appended to the chunk object. If disabled,
// they will simply be disregarded.
//
// When the lexer has derived a valid token, it will be returned as an object
// containing its value and as well as its position in the input string (this
// is always enabled to provide proper debug messages).
//
// `lex()` starts lexing and returns the following token in the stream.
var index
, token
, previousToken
, lookahead
, comments
, tokenStart
, line
, lineStart;
exports.lex = lex;
function lex() {
skipWhiteSpace();
// Skip comments beginning with --
while (45 === input.charCodeAt(index) &&
45 === input.charCodeAt(index + 1)) {
scanComment();
skipWhiteSpace();
}
if (index >= length) return {
type : EOF
, value: '<eof>'
, line: line
, lineStart: lineStart
, range: [index, index]
};
var charCode = input.charCodeAt(index)
, next = input.charCodeAt(index + 1);
// Memorize the range index where the token begins.
tokenStart = index;
if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword();
switch (charCode) {
case 39: case 34: // '"
return scanStringLiteral();
// 0-9
case 48: case 49: case 50: case 51: case 52: case 53:
case 54: case 55: case 56: case 57:
return scanNumericLiteral();
case 46: // .
// If the dot is followed by a digit it's a float.
if (isDecDigit(next)) return scanNumericLiteral();
if (46 === next) {
if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral();
return scanPunctuator('..');
}
return scanPunctuator('.');
case 61: // =
if (61 === next) return scanPunctuator('==');
return scanPunctuator('=');
case 62: // >
if (61 === next) return scanPunctuator('>=');
return scanPunctuator('>');
case 60: // <
if (61 === next) return scanPunctuator('<=');
return scanPunctuator('<');
case 126: // ~
if (61 === next) return scanPunctuator('~=');
return raise({}, errors.expected, '=', '~');
case 58: // :
if (58 === next) return scanPunctuator('::');
return scanPunctuator(':');
case 91: // [
// Check for a multiline string, they begin with [= or [[
if (91 === next || 61 === next) return scanLongStringLiteral();
return scanPunctuator('[');
// \* / ^ % , { } ] ( ) ; # - +
case 42: case 47: case 94: case 37: case 44: case 123: case 125:
case 93: case 40: case 41: case 59: case 35: case 45: case 43:
return scanPunctuator(input.charAt(index));
}
return unexpected(input.charAt(index));
}
// Whitespace has no semantic meaning in lua so simply skip ahead while
// tracking the encounted newlines. Newlines are also tracked in all
// token functions where multiline values are allowed.
function skipWhiteSpace() {
while (index < length) {
var charCode = input.charCodeAt(index);
if (isWhiteSpace(charCode)) {
index++;
} else if (isLineTerminator(charCode)) {
line++;
lineStart = ++index;
} else {
break;
}
}
}
// Identifiers, keywords, booleans and nil all look the same syntax wise. We
// simply go through them one by one and defaulting to an identifier if no
// previous case matched.
function scanIdentifierOrKeyword() {
var value, type;
// Slicing the input string is prefered before string concatenation in a
// loop for performance reasons.
while (isIdentifierPart(input.charCodeAt(++index)));
value = input.slice(tokenStart, index);
// Decide on the token type and possibly cast the value.
if (isKeyword(value)) {
type = Keyword;
} else if ('true' === value || 'false' === value) {
type = BooleanLiteral;
value = ('true' === value);
} else if ('nil' === value) {
type = NilLiteral;
value = null;
} else {
type = Identifier;
}
return {
type: type
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Once a punctuator reaches this function it should already have been
// validated so we simply return it as a token.
function scanPunctuator(value) {
index += value.length;
return {
type: Punctuator
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// A vararg literal consists of three dots.
function scanVarargLiteral() {
index += 3;
return {
type: VarargLiteral
, value: '...'
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Find the string literal by matching the delimiter marks used.
function scanStringLiteral() {
var delimiter = input.charCodeAt(index++)
, stringStart = index
, string = ''
, charCode;
while (index < length) {
charCode = input.charCodeAt(index++);
if (delimiter === charCode) break;
if (92 === charCode) { // \
string += input.slice(stringStart, index - 1) + readEscapeSequence();
stringStart = index;
}
// EOF or `\n` terminates a string literal. If we haven't found the
// ending delimiter by now, raise an exception.
else if (index >= length || isLineTerminator(charCode)) {
string += input.slice(stringStart, index - 1);
raise({}, errors.unfinishedString, string + String.fromCharCode(charCode));
}
}
string += input.slice(stringStart, index - 1);
return {
type: StringLiteral
, value: string
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Expect a multiline string literal and return it as a regular string
// literal, if it doesn't validate into a valid multiline string, throw an
// exception.
function scanLongStringLiteral() {
var string = readLongString();
// Fail if it's not a multiline literal.
if (false === string) raise(token, errors.expected, '[', token.value);
return {
type: StringLiteral
, value: string
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Numeric literals will be returned as floating-point numbers instead of
// strings. The raw value should be retrieved from slicing the input string
// later on in the process.
//
// If a hexadecimal number is encountered, it will be converted.
function scanNumericLiteral() {
var character = input.charAt(index)
, next = input.charAt(index + 1);
var value = ('0' === character && 'xX'.indexOf(next || null) >= 0) ?
readHexLiteral() : readDecLiteral();
return {
type: NumericLiteral
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Lua hexadecimals have an optional fraction part and an optional binary
// exoponent part. These are not included in JavaScript so we will compute
// all three parts separately and then sum them up at the end of the function
// with the following algorithm.
//
// Digit := toDec(digit)
// Fraction := toDec(fraction) / 16 ^ fractionCount
// BinaryExp := 2 ^ binaryExp
// Number := ( Digit + Fraction ) * BinaryExp
function readHexLiteral() {
var fraction = 0 // defaults to 0 as it gets summed
, binaryExponent = 1 // defaults to 1 as it gets multiplied
, binarySign = 1 // positive
, digit, fractionStart, exponentStart, digitStart;
digitStart = index += 2; // Skip 0x part
// A minimum of one hex digit is required.
if (!isHexDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isHexDigit(input.charCodeAt(index))) index++;
// Convert the hexadecimal digit to base 10.
digit = parseInt(input.slice(digitStart, index), 16);
// Fraction part i optional.
if ('.' === input.charAt(index)) {
fractionStart = ++index;
while (isHexDigit(input.charCodeAt(index))) index++;
fraction = input.slice(fractionStart, index);
// Empty fraction parts should default to 0, others should be converted
// 0.x form so we can use summation at the end.
fraction = (fractionStart === index) ? 0
: parseInt(fraction, 16) / Math.pow(16, index - fractionStart);
}
// Binary exponents are optional
if ('pP'.indexOf(input.charAt(index) || null) >= 0) {
index++;
// Sign part is optional and defaults to 1 (positive).
if ('+-'.indexOf(input.charAt(index) || null) >= 0)
binarySign = ('+' === input.charAt(index++)) ? 1 : -1;
exponentStart = index;
// The binary exponent sign requires a decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) index++;
binaryExponent = input.slice(exponentStart, index);
// Calculate the binary exponent of the number.
binaryExponent = Math.pow(2, binaryExponent * binarySign);
}
return (digit + fraction) * binaryExponent;
}
// Decimal numbers are exactly the same in Lua and in JavaScript, because of
// this we check where the token ends and then parse it with native
// functions.
function readDecLiteral() {
while (isDecDigit(input.charCodeAt(index))) index++;
// Fraction part is optional
if ('.' === input.charAt(index)) {
index++;
// Fraction part defaults to 0
while (isDecDigit(input.charCodeAt(index))) index++;
}
// Exponent part is optional.
if ('eE'.indexOf(input.charAt(index) || null) >= 0) {
index++;
// Sign part is optional.
if ('+-'.indexOf(input.charAt(index) || null) >= 0) index++;
// An exponent is required to contain at least one decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise({}, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) index++;
}
return parseFloat(input.slice(tokenStart, index));
}
// Translate escape sequences to the actual characters.
function readEscapeSequence() {
var sequenceStart = index;
switch (input.charAt(index)) {
// Lua allow the following escape sequences.
// We don't escape the bell sequence.
case 'n': index++; return '\n';
case 'r': index++; return '\r';
case 't': index++; return '\t';
case 'v': index++; return '\x0B';
case 'b': index++; return '\b';
case 'f': index++; return '\f';
// Skips the following span of white-space.
case 'z': index++; skipWhiteSpace(); return '';
// Byte representation should for now be returned as is.
case 'x':
// \xXX, where XX is a sequence of exactly two hexadecimal digits
if (isHexDigit(input.charCodeAt(index + 1)) &&
isHexDigit(input.charCodeAt(index + 2))) {
index += 3;
// Return it as is, without translating the byte.
return '\\' + input.slice(sequenceStart, index);
}
return '\\' + input.charAt(index++);
default:
// \ddd, where ddd is a sequence of up to three decimal digits.
if (isDecDigit(input.charCodeAt(index))) {
while (isDecDigit(input.charCodeAt(++index)));
return '\\' + input.slice(sequenceStart, index);
}
// Simply return the \ as is, it's not escaping any sequence.
return input.charAt(index++);
}
}
// Comments begin with -- after which it will be decided if they are
// multiline comments or not.
//
// The multiline functionality works the exact same way as with string
// literals so we reuse the functionality.
function scanComment() {
tokenStart = index;
index += 2; // --
var character = input.charAt(index)
, content = ''
, isLong = false
, commentStart = index
, lineStartComment = lineStart
, lineComment = line;
if ('[' === character) {
content = readLongString();
// This wasn't a multiline comment after all.
if (false === content) content = character;
else isLong = true;
}
// Scan until next line as long as it's not a multiline comment.
if (!isLong) {
while (index < length) {
if (isLineTerminator(input.charCodeAt(index))) break;
index++;
}
if (options.comments) content = input.slice(commentStart, index);
}
if (options.comments) {
var node = ast.comment(content, input.slice(tokenStart, index));
// `Marker`s depend on tokens available in the parser and as comments are
// intercepted in the lexer all location data is set manually.
if (options.locations) {
node.loc = {
start: { line: lineComment, column: tokenStart - lineStartComment }
, end: { line: line, column: index - lineStart }
};
}
if (options.ranges) {
node.range = [tokenStart, index];
}
comments.push(node);
}
}
// Read a multiline string by calculating the depth of `=` characters and
// then appending until an equal depth is found.
function readLongString() {
var level = 0
, content = ''
, terminator = false
, character, stringStart;
index++; // [
// Calculate the depth of the comment.
while ('=' === input.charAt(index + level)) level++;
// Exit, this is not a long string afterall.
if ('[' !== input.charAt(index + level)) return false;
index += level + 1;
// If the first character is a newline, ignore it and begin on next line.
if (isLineTerminator(input.charCodeAt(index))) {
line++;
lineStart = index++;
}
stringStart = index;
while (index < length) {
character = input.charAt(index++);
// We have to keep track of newlines as `skipWhiteSpace()` does not get
// to scan this part.
if (isLineTerminator(character.charCodeAt(0))) {
line++;
lineStart = index;
}
// Once the delimiter is found, iterate through the depth count and see
// if it matches.
if (']' === character) {
terminator = true;
for (var i = 0; i < level; i++) {
if ('=' !== input.charAt(index + i)) terminator = false;
}
if (']' !== input.charAt(index + level)) terminator = false;
}
// We reached the end of the multiline string. Get out now.
if (terminator) break;
}
content += input.slice(stringStart, index - 1);
index += level + 1;
return content;
}
// ## Lex functions and helpers.
// Read the next token.
//
// This is actually done by setting the current token to the lookahead and
// reading in the new lookahead token.
function next() {
previousToken = token;
token = lookahead;
lookahead = lex();
}
// Consume a token if its value matches. Once consumed or not, return the
// success of the operation.
function consume(value) {
if (value === token.value) {
next();
return true;
}
return false;
}
// Expect the next token value to match. If not, throw an exception.
function expect(value) {
if (value === token.value) next();
else raise(token, errors.expected, value, token.value);
}
// ### Validation functions
function isWhiteSpace(charCode) {
return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode;
}
function isLineTerminator(charCode) {
return 10 === charCode || 13 === charCode;
}
function isDecDigit(charCode) {
return charCode >= 48 && charCode <= 57;
}
function isHexDigit(charCode) {
return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70);
}
// From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards
// identifiers cannot use locale-dependet letters.
function isIdentifierStart(charCode) {
return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode;
}
function isIdentifierPart(charCode) {
return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57);
}
// [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1)
//
// `true`, `false` and `nil` will not be considered keywords, but literals.
function isKeyword(id) {
switch (id.length) {
case 2:
return 'do' === id || 'if' === id || 'in' === id || 'or' === id;
case 3:
return 'and' === id || 'end' === id || 'for' === id || 'not' === id;
case 4:
return 'else' === id || 'goto' === id || 'then' === id;
case 5:
return 'break' === id || 'local' === id || 'until' === id || 'while' === id;
case 6:
return 'elseif' === id || 'repeat' === id || 'return' === id;
case 8:
return 'function' === id;
}
return false;
}
function isUnary(token) {
if (Punctuator === token.type) return '#-'.indexOf(token.value) >= 0;
if (Keyword === token.type) return 'not' === token.value;
return false;
}
// @TODO this needs to be rethought.
function isCallExpression(expression) {
switch (expression.type) {
case 'CallExpression':
case 'TableCallExpression':
case 'StringCallExpression':
return true;
}
return false;
}
// Check if the token syntactically closes a block.
function isBlockFollow(token) {
if (EOF === token.type) return true;
if (Keyword !== token.type) return false;
switch (token.value) {
case 'else': case 'elseif':
case 'end': case 'until':
return true;
default:
return false;
}
}
// Scope
// -----
// Store each block scope as a an array of identifier names. Each scope is
// stored in an FILO-array.
var scopes
// The current scope index
, scopeDepth
// A list of all global identifier nodes.
, globals;
// Create a new scope inheriting all declarations from the previous scope.
function createScope() {
scopes.push(Array.apply(null, scopes[scopeDepth++]));
}
// Exit and remove the current scope.
function exitScope() {
scopes.pop();
scopeDepth--;
}
// Add identifier name to the current scope if it doesnt already exist.
function scopeIdentifierName(name) {
if (-1 !== indexOf(scopes[scopeDepth], name)) return;
scopes[scopeDepth].push(name);
}
// Add identifier to the current scope
function scopeIdentifier(node) {
scopeIdentifierName(node.name);
attachScope(node, true);
}
// Attach scope information to node. If the node is global, store it in the
// globals array so we can return the information to the user.
function attachScope(node, isLocal) {
if (!isLocal && -1 === indexOfObject(globals, 'name', node.name))
globals.push(node);
node.isLocal = isLocal;
}
// Is the identifier name available in this scope.
function scopeHasName(name) {
return (-1 !== indexOf(scopes[scopeDepth], name));
}
// Location tracking
// -----------------
//
// Locations are stored in FILO-array as a `Marker` object consisting of both
// `loc` and `range` data. Once a `Marker` is popped off the list an end
// location is added and the data is attached to a syntax node.
var locations = []
, trackLocations;
function createLocationMarker() {
return new Marker(token);
}
function Marker(token) {
if (options.locations) {
this.loc = {
start: {
line: token.line
, column: token.range[0] - token.lineStart
}
, end: {
line: 0
, column: 0
}
};
}
if (options.ranges) this.range = [token.range[0], 0];
}
// Complete the location data stored in the `Marker` by adding the location
// of the *previous token* as an end location.
Marker.prototype.complete = function() {
if (options.locations) {
this.loc.end.line = previousToken.line;
this.loc.end.column = previousToken.range[1] - previousToken.lineStart;
}
if (options.ranges) {
this.range[1] = previousToken.range[1];
}
};
// Create a new `Marker` and add it to the FILO-array.
function markLocation() {
if (trackLocations) locations.push(createLocationMarker());
}
// Push an arbitrary `Marker` object onto the FILO-array.
function pushLocation(marker) {
if (trackLocations) locations.push(marker);
}
// Parse functions
// ---------------
// Chunk is the main program object. Syntactically it's the same as a block.
//
// chunk ::= block
function parseChunk() {
next();
markLocation();
var body = parseBlock();
if (EOF !== token.type) unexpected(token);
// If the body is empty no previousToken exists when finishNode runs.
if (trackLocations && !body.length) previousToken = token;
return finishNode(ast.chunk(body));
}
// A block contains a list of statements with an optional return statement
// as its last statement.
//
// block ::= {stat} [retstat]
function parseBlock(terminator) {
var block = []
, statement;
// Each block creates a new scope.
if (options.scope) createScope();
while (!isBlockFollow(token)) {
// Return has to be the last statement in a block.
if ('return' === token.value) {
block.push(parseStatement());
break;
}
statement = parseStatement();
// Statements are only added if they are returned, this allows us to
// ignore some statements, such as EmptyStatement.
if (statement) block.push(statement);
}
if (options.scope) exitScope();
// Doesn't really need an ast node
return block;
}
// There are two types of statements, simple and compound.
//
// statement ::= break | goto | do | while | repeat | return
// | if | for | function | local | label | assignment
// | functioncall | ';'
function parseStatement() {
markLocation();
if (Keyword === token.type) {
switch (token.value) {
case 'local': next(); return parseLocalStatement();
case 'if': next(); return parseIfStatement();
case 'return': next(); return parseReturnStatement();
case 'function': next();
var name = parseFunctionName();
return parseFunctionDeclaration(name);
case 'while': next(); return parseWhileStatement();
case 'for': next(); return parseForStatement();
case 'repeat': next(); return parseRepeatStatement();
case 'break': next(); return parseBreakStatement();
case 'do': next(); return parseDoStatement();
case 'goto': next(); return parseGotoStatement();
}
}
if (Punctuator === token.type) {
if (consume('::')) return parseLabelStatement();
}
// Assignments memorizes the location and pushes it manually for wrapper
// nodes. Additionally empty `;` statements should not mark a location.
if (trackLocations) locations.pop();
// When a `;` is encounted, simply eat it without storing it.
if (consume(';')) return;
return parseAssignmentOrCallStatement();
}
// ## Statements
// label ::= '::' Name '::'
function parseLabelStatement() {
var name = token.value
, label = parseIdentifier();
if (options.scope) {
scopeIdentifierName('::' + name + '::');
attachScope(label, true);
}
expect('::');
return finishNode(ast.labelStatement(label));
}
// break ::= 'break'
function parseBreakStatement() {
return finishNode(ast.breakStatement());
}
// goto ::= 'goto' Name
function parseGotoStatement() {
var name = token.value
, label = parseIdentifier();
if (options.scope) label.isLabel = scopeHasName('::' + name + '::');
return finishNode(ast.gotoStatement(label));
}
// do ::= 'do' block 'end'
function parseDoStatement() {
var body = parseBlock();
expect('end');
return finishNode(ast.doStatement(body));
}
// while ::= 'while' exp 'do' block 'end'
function parseWhileStatement() {
var condition = parseExpectedExpression();
expect('do');
var body = parseBlock();
expect('end');
return finishNode(ast.whileStatement(condition, body));
}
// repeat ::= 'repeat' block 'until' exp
function parseRepeatStatement() {
var body = parseBlock();
expect('until');
var condition = parseExpectedExpression();
return finishNode(ast.repeatStatement(condition, body));
}
// retstat ::= 'return' [exp {',' exp}] [';']
function parseReturnStatement() {
var expressions = [];
if ('end' !== token.value) {
var expression = parseExpression();
if (null != expression) expressions.push(expression);
while (consume(',')) {
expression = parseExpectedExpression();
expressions.push(expression);
}
consume(';'); // grammar tells us ; is optional here.
}
return finishNode(ast.returnStatement(expressions));
}
// if ::= 'if' exp 'then' block {elif} ['else' block] 'end'
// elif ::= 'elseif' exp 'then' block
function parseIfStatement() {
var clauses = []
, condition
, body
, marker;
// IfClauses begin at the same location as the parent IfStatement.
// It ends at the start of `end`, `else`, or `elseif`.
if (trackLocations) {
marker = locations[locations.length - 1];
locations.push(marker);
}
condition = parseExpectedExpression();
expect('then');
body = parseBlock();
clauses.push(finishNode(ast.ifClause(condition, body)));
if (trackLocations) marker = createLocationMarker();
while (consume('elseif')) {
pushLocation(marker);
condition = parseExpectedExpression();
expect('then');
body = parseBlock();
clauses.push(finishNode(ast.elseifClause(condition, body)));
if (trackLocations) marker = createLocationMarker();
}
if (consume('else')) {
// Include the `else` in the location of ElseClause.
if (trackLocations) {
marker = new Marker(previousToken);
locations.push(marker);
}
body = parseBlock();
clauses.push(finishNode(ast.elseClause(body)));
}
expect('end');
return finishNode(ast.ifStatement(clauses));
}
// There are two types of for statements, generic and numeric.
//
// for ::= Name '=' exp ',' exp [',' exp] 'do' block 'end'
// for ::= namelist 'in' explist 'do' block 'end'
// namelist ::= Name {',' Name}
// explist ::= exp {',' exp}
function parseForStatement() {
var variable = parseIdentifier()
, body;
// The start-identifier is local.
if (options.scope) scopeIdentifier(variable);
// If the first expression is followed by a `=` punctuator, this is a
// Numeric For Statement.
if (consume('=')) {
// Start expression
var start = parseExpectedExpression();
expect(',');
// End expression
var end = parseExpectedExpression();
// Optional step expression
var step = consume(',') ? parseExpectedExpression() : null;
expect('do');
body = parseBlock();
expect('end');
return finishNode(ast.forNumericStatement(variable, start, end, step, body));
}
// If not, it's a Generic For Statement
else {
// The namelist can contain one or more identifiers.
var variables = [variable];
while (consume(',')) {
variable = parseIdentifier();
// Each variable in the namelist is locally scoped.
if (options.scope) scopeIdentifier(variable);
variables.push(variable);
}
expect('in');
var iterators = [];
// One or more expressions in the explist.
do {
var expression = parseExpectedExpression();
iterators.push(expression);
} while (consume(','));
expect('do');
body = parseBlock();
expect('end');
return finishNode(ast.forGenericStatement(variables, iterators, body));
}
}
// Local statements can either be variable assignments or function
// definitions. If a function definition is found, it will be delegated to
// `parseFunctionDeclaration()` with the isLocal flag.
//
// This AST structure might change into a local assignment with a function
// child.
//
// local ::= 'local' 'function' Name funcdecl
// | 'local' Name {',' Name} ['=' exp {',' exp}
function parseLocalStatement() {
var name;
if (Identifier === token.type) {
var variables = []
, init = [];
do {
name = parseIdentifier();
variables.push(name);
} while (consume(','));
if (consume('=')) {
do {
var expression = parseExpectedExpression();
init.push(expression);
} while (consume(','));
}
// Declarations doesn't exist before the statement has been evaluated.
// Therefore assignments can't use their declarator. And the identifiers
// shouldn't be added to the scope until the statement is complete.
if (options.scope) {
for (var i = 0, l = variables.length; i < l; i++) {
scopeIdentifier(variables[i]);
}
}
return finishNode(ast.localStatement(variables, init));
}
if (consume('function')) {
name = parseIdentifier();
if (options.scope) scopeIdentifier(name);
// MemberExpressions are not allowed in local function statements.
return parseFunctionDeclaration(name, true);
} else {
raiseUnexpectedToken('<name>', token);
}
}
// assignment ::= varlist '=' explist
// varlist ::= prefixexp {',' prefixexp}
// explist ::= exp {',' exp}
//
// call ::= callexp
// callexp ::= prefixexp args | prefixexp ':' Name args
function parseAssignmentOrCallStatement() {
// Keep a reference to the previous token for better error messages in case
// of invalid statement
var previous = token
, expression, marker;
if (trackLocations) marker = createLocationMarker();
expression = parsePrefixExpression();
if (null == expression) return unexpected(token);
if (',='.indexOf(token.value) >= 0) {
var variables = [expression]
, init = []
, exp;
while (consume(',')) {
exp = parsePrefixExpression();
if (null == exp) raiseUnexpectedToken('<expression>', token);
variables.push(exp);
}
expect('=');
do {
exp = parseExpectedExpression();
init.push(exp);
} while (consume(','));
pushLocation(marker);
return finishNode(ast.assignmentStatement(variables, init));
}
if (isCallExpression(expression)) {
pushLocation(marker);
return finishNode(ast.callStatement(expression));
}
// The prefix expression was neither part of an assignment or a
// callstatement, however as it was valid it's been consumed, so raise
// the exception on the previous token to provide a helpful message.
return unexpected(previous);
}
// ### Non-statements
// Identifier ::= Name
function parseIdentifier() {
markLocation();
var identifier = token.value;
if (Identifier !== token.type) raiseUnexpectedToken('<name>', token);
next();
return finishNode(ast.identifier(identifier));
}
// Parse the functions parameters and body block. The name should already
// have been parsed and passed to this declaration function. By separating
// this we allow for anonymous functions in expressions.
//
// For local functions there's a boolean parameter which needs to be set
// when parsing the declaration.
//
// funcdecl ::= '(' [parlist] ')' block 'end'
// parlist ::= Name {',' Name} | [',' '...'] | '...'
function parseFunctionDeclaration(name, isLocal) {
var parameters = [];
expect('(');
// The declaration has arguments
if (!consume(')')) {
// Arguments are a comma separated list of identifiers, optionally ending
// with a vararg.
while (true) {
if (Identifier === token.type) {
var parameter = parseIdentifier();
// Function parameters are local.
if (options.scope) scopeIdentifier(parameter);
parameters.push(parameter);
if (consume(',')) continue;
else if (consume(')')) break;
}
// No arguments are allowed after a vararg.
else if (VarargLiteral === token.type) {
parameters.push(parsePrimaryExpression());
expect(')');
break;
} else {
raiseUnexpectedToken('<name> or \'...\'', token);
}
}
}
var body = parseBlock();
expect('end');
isLocal = isLocal || false;
return finishNode(ast.functionStatement(name, parameters, isLocal, body));
}
// Parse the function name as identifiers and member expressions.
//
// Name {'.' Name} [':' Name]
function parseFunctionName() {
var base, name, marker;
if (trackLocations) marker = createLocationMarker();
base = parseIdentifier();
if (options.scope) attachScope(base, false);
while (consume('.')) {
pushLocation(marker);
name = parseIdentifier();
if (options.scope) attachScope(name, false);
base = finishNode(ast.memberExpression(base, '.', name));
}
if (consume(':')) {
pushLocation(marker);
name = parseIdentifier();
if (options.scope) attachScope(name, false);
base = finishNode(ast.memberExpression(base, ':', name));
}
return base;
}
// tableconstructor ::= '{' [fieldlist] '}'
// fieldlist ::= field {fieldsep field} fieldsep
// field ::= '[' exp ']' '=' exp | Name = 'exp' | exp
//
// fieldsep ::= ',' | ';'
function parseTableConstructor() {
var fields = []
, key, value;
while (true) {
markLocation();
if (Punctuator === token.type && consume('[')) {
key = parseExpectedExpression();
expect(']');
expect('=');
value = parseExpectedExpression();
fields.push(finishNode(ast.tableKey(key, value)));
} else if (Identifier === token.type) {
key = parseExpectedExpression();
if (consume('=')) {
value = parseExpectedExpression();
fields.push(finishNode(ast.tableKeyString(key, value)));
} else {
fields.push(finishNode(ast.tableValue(key)));
}
} else {
if (null == (value = parseExpression())) {
locations.pop();
break;
}
fields.push(finishNode(ast.tableValue(value)));
}
if (',;'.indexOf(token.value) >= 0) {
next();
continue;
}
if ('}' === token.value) break;
}
expect('}');
return finishNode(ast.tableConstructorExpression(fields));
}
// Expression parser
// -----------------
//
// Expressions are evaluated and always return a value. If nothing is
// matched null will be returned.
//
// exp ::= (unop exp | primary | prefixexp ) { binop exp }
//
// primary ::= nil | false | true | Number | String | '...'
// | functiondef | tableconstructor
//
// prefixexp ::= (Name | '(' exp ')' ) { '[' exp ']'
// | '.' Name | ':' Name args | args }
//
function parseExpression() {
var expression = parseSubExpression(0);
return expression;
}
// Parse an expression expecting it to be valid.
function parseExpectedExpression() {
var expression = parseExpression();
if (null == expression) raiseUnexpectedToken('<expression>', token);
else return expression;
}
// Return the precedence priority of the operator.
//
// As unary `-` can't be distinguished from binary `-`, unary precedence
// isn't described in this table but in `parseSubExpression()` itself.
//
// As this function gets hit on every expression it's been optimized due to
// the expensive CompareICStub which took ~8% of the parse time.
function binaryPrecedence(operator) {
var charCode = operator.charCodeAt(0)
, length = operator.length;
if (1 === length) {
switch (charCode) {
case 94: return 10; // ^
case 42: case 47: case 37: return 7; // * / %
case 43: case 45: return 6; // + -
case 60: case 62: return 3; // < >
}
} else if (2 === length) {
switch (charCode) {
case 46: return 5; // ..
case 60: case 62: case 61: case 126: return 3; // <= >= == ~=
case 111: return 1; // or
}
} else if (97 === charCode && 'and' === operator) return 2;
return 0;
}
// Implement an operator-precedence parser to handle binary operator
// precedence.
//
// We use this algorithm because it's compact, it's fast and Lua core uses
// the same so we can be sure our expressions are parsed in the same manner
// without excessive amounts of tests.
//
// exp ::= (unop exp | primary | prefixexp ) { binop exp }
function parseSubExpression(minPrecedence) {
var operator = token.value
// The left-hand side in binary operations.
, expression, marker;
if (trackLocations) marker = createLocationMarker();
// UnaryExpression
if (isUnary(token)) {
markLocation();
next();
var argument = parseSubExpression(8);
if (argument == null) raiseUnexpectedToken('<expression>', token);
expression = finishNode(ast.unaryExpression(operator, argument));
}
if (null == expression) {
// PrimaryExpression
expression = parsePrimaryExpression();
// PrefixExpression
if (null == expression) {
expression = parsePrefixExpression();
}
}
// This is not a valid left hand expression.
if (null == expression) return null;
var precedence;
while (true) {
operator = token.value;
precedence = (Punctuator === token.type || Keyword === token.type) ?
binaryPrecedence(operator) : 0;
if (precedence === 0 || precedence <= minPrecedence) break;
// Right-hand precedence operators
if ('^' === operator || '..' === operator) precedence--;
next();
var right = parseSubExpression(precedence);
if (null == right) raiseUnexpectedToken('<expression>', token);
// Push in the marker created before the loop to wrap its entirety.
if (trackLocations) locations.push(marker);
expression = finishNode(ast.binaryExpression(operator, expression, right));
}
return expression;
}
// prefixexp ::= prefix {suffix}
// prefix ::= Name | '(' exp ')'
// suffix ::= '[' exp ']' | '.' Name | ':' Name args | args
//
// args ::= '(' [explist] ')' | tableconstructor | String
function parsePrefixExpression() {
var base, name, marker
// Keep track of the scope, if a parent is local so are the children.
, isLocal;
if (trackLocations) marker = createLocationMarker();
// The prefix
if (Identifier === token.type) {
name = token.value;
base = parseIdentifier();
// Set the parent scope.
if (options.scope) attachScope(base, isLocal = scopeHasName(name));
} else if (consume('(')) {
base = parseExpectedExpression();
expect(')');
if (options.scope) isLocal = base.isLocal;
} else {
return null;
}
// The suffix
var expression, identifier;
while (true) {
if (Punctuator === token.type) {
switch (token.value) {
case '[':
pushLocation(marker);
next();
expression = parseExpectedExpression();
base = finishNode(ast.indexExpression(base, expression));
expect(']');
break;
case '.':
pushLocation(marker);
next();
identifier = parseIdentifier();
// Inherit the scope
if (options.scope) attachScope(identifier, isLocal);
base = finishNode(ast.memberExpression(base, '.', identifier));
break;
case ':':
pushLocation(marker);
next();
identifier = parseIdentifier();
if (options.scope) attachScope(identifier, isLocal);
base = finishNode(ast.memberExpression(base, ':', identifier));
// Once a : is found, this has to be a CallExpression, otherwise
// throw an error.
pushLocation(marker);
base = parseCallExpression(base);
break;
case '(': case '{': // args
pushLocation(marker);
base = parseCallExpression(base);
break;
default:
return base;
}
} else if (StringLiteral === token.type) {
pushLocation(marker);
base = parseCallExpression(base);
} else {
break;
}
}
return base;
}
// args ::= '(' [explist] ')' | tableconstructor | String
function parseCallExpression(base) {
if (Punctuator === token.type) {
switch (token.value) {
case '(':
next();
// List of expressions
var expressions = [];
var expression = parseExpression();
if (null != expression) expressions.push(expression);
while (consume(',')) {
expression = parseExpectedExpression();
expressions.push(expression);
}
expect(')');
return finishNode(ast.callExpression(base, expressions));
case '{':
markLocation();
next();
var table = parseTableConstructor();
return finishNode(ast.tableCallExpression(base, table));
}
} else if (StringLiteral === token.type) {
return finishNode(ast.stringCallExpression(base, parsePrimaryExpression()));
}
raiseUnexpectedToken('function arguments', token);
}
// primary ::= String | Numeric | nil | true | false
// | functiondef | tableconstructor | '...'
function parsePrimaryExpression() {
var literals = StringLiteral | NumericLiteral | BooleanLiteral | NilLiteral | VarargLiteral
, value = token.value
, type = token.type
, marker;
if (trackLocations) marker = createLocationMarker();
if (type & literals) {
pushLocation(marker);
var raw = input.slice(token.range[0], token.range[1]);
next();
return finishNode(ast.literal(type, value, raw));
} else if (Keyword === type && 'function' === value) {
pushLocation(marker);
next();
return parseFunctionDeclaration(null);
} else if (consume('{')) {
pushLocation(marker);
return parseTableConstructor();
}
}
// Parser
// ------
// Export the main parser.
//
// - `wait` Hold parsing until end() is called. Defaults to false
// - `comments` Store comments. Defaults to true.
// - `scope` Track identifier scope. Defaults to false.
//
// Example:
//
// var parser = require('luaparser');
// parser.parse('i = 0');
exports.parse = parse;
function parse(_input, _options) {
if ('undefined' === typeof _options && 'object' === typeof _input) {
_options = _input;
_input = undefined;
}
if (!_options) _options = {};
input = _input || '';
options = extend(defaultOptions, _options);
// Rewind the lexer
index = 0;
line = 1;
lineStart = 0;
length = input.length;
// When tracking identifier scope, initialize with an empty scope.
scopes = [[]];
scopeDepth = 0;
globals = [];
locations = [];
if (options.comments) comments = [];
if (!options.wait) return end();
return exports;
}
// Write to the source code buffer without beginning the parse.
exports.write = write;
function write(_input) {
input += String(_input);
length = input.length;
return exports;
}
// Send an EOF and begin parsing.
exports.end = end;
function end(_input) {
if ('undefined' !== typeof _input) write(_input);
length = input.length;
trackLocations = options.locations || options.ranges;
// Initialize with a lookahead token.
lookahead = lex();
var chunk = parseChunk();
if (options.comments) chunk.comments = comments;
if (options.scope) chunk.globals = globals;
if (locations.length > 0)
throw new Error('Location tracking failed. This is most likely a bug in luaparse');
return chunk;
}
}));
/* vim: set sw=2 ts=2 et tw=79 : */
});