Merge pull request #4854 from overleaf/hb-decaf-log-parsers

Decaf log parsers

GitOrigin-RevId: 71c44b8ac86f7fd980b0547a4c5d49651a31d977
This commit is contained in:
Hugh O'Brien 2021-08-24 14:57:53 +01:00 committed by Copybot
parent 006a140fb8
commit 19bb96bb20
10 changed files with 578 additions and 978 deletions

View file

@ -1,3 +0,0 @@
#! env sh
./node_modules/.bin/coffee --bare -o dist/ src/coffee/latex-log-parser.coffee
./node_modules/.bin/coffee --bare -o dist/ src/coffee/bib-log-parser.coffee

View file

@ -1,190 +0,0 @@
// Generated by CoffeeScript 1.12.7
define(function() {
var BAD_CROSS_REFERENCE_REGEX, BibLogParser, LINE_SPLITTER_REGEX, MESSAGE_LEVELS, MULTILINE_COMMAND_ERROR_REGEX, MULTILINE_ERROR_REGEX, MULTILINE_WARNING_REGEX, SINGLELINE_WARNING_REGEX, consume, errorParsers, warningParsers;
LINE_SPLITTER_REGEX = /^\[(\d+)].*>\s(INFO|WARN|ERROR)\s-\s(.*)$/;
MESSAGE_LEVELS = {
"INFO": "info",
"WARN": "warning",
"ERROR": "error"
};
BibLogParser = function(text, options) {
if (typeof text !== 'string') {
throw new Error("BibLogParser Error: text parameter must be a string");
}
this.text = text.replace(/(\r\n)|\r/g, '\n');
this.options = options || {};
this.lines = text.split('\n');
};
consume = function(logText, regex, process) {
var iterationCount, match, newEntry, re, result, text;
text = logText;
result = [];
re = regex;
iterationCount = 0;
while (match = re.exec(text)) {
iterationCount += 1;
if (iterationCount >= 10000) {
return result;
}
newEntry = process(match);
result.push(newEntry);
text = (match.input.slice(0, match.index)) + (match.input.slice(match.index + match[0].length + 1, match.input.length));
}
return [result, text];
};
MULTILINE_WARNING_REGEX = /^Warning--(.+)\n--line (\d+) of file (.+)$/m;
SINGLELINE_WARNING_REGEX = /^Warning--(.+)$/m;
MULTILINE_ERROR_REGEX = /^(.*)---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this entry$/m;
BAD_CROSS_REFERENCE_REGEX = /^(A bad cross reference---entry ".+?"\nrefers to entry.+?, which doesn't exist)$/m;
MULTILINE_COMMAND_ERROR_REGEX = /^(.*)\n?---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this command$/m;
warningParsers = [
[
MULTILINE_WARNING_REGEX, function(match) {
var fileName, fullMatch, lineNumber, message;
fullMatch = match[0], message = match[1], lineNumber = match[2], fileName = match[3];
return {
file: fileName,
level: "warning",
message: message,
line: lineNumber,
raw: fullMatch
};
}
], [
SINGLELINE_WARNING_REGEX, function(match) {
var fullMatch, message;
fullMatch = match[0], message = match[1];
return {
file: '',
level: "warning",
message: message,
line: '',
raw: fullMatch
};
}
]
];
errorParsers = [
[
MULTILINE_ERROR_REGEX, function(match) {
var fileName, firstMessage, fullMatch, lineNumber, secondMessage;
fullMatch = match[0], firstMessage = match[1], lineNumber = match[2], fileName = match[3], secondMessage = match[4];
return {
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
};
}
], [
BAD_CROSS_REFERENCE_REGEX, function(match) {
var fullMatch, message;
fullMatch = match[0], message = match[1];
return {
file: '',
level: "error",
message: message,
line: '',
raw: fullMatch
};
}
], [
MULTILINE_COMMAND_ERROR_REGEX, function(match) {
var fileName, firstMessage, fullMatch, lineNumber, secondMessage;
fullMatch = match[0], firstMessage = match[1], lineNumber = match[2], fileName = match[3], secondMessage = match[4];
return {
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
};
}
]
];
(function() {
this.parseBibtex = function() {
var allErrors, allWarnings, ref, ref1, remainingText, result;
result = {
all: [],
errors: [],
warnings: [],
files: [],
typesetting: []
};
ref = warningParsers.reduce(function(accumulator, parser) {
var _remainingText, currentWarnings, process, ref, regex, text, warnings;
currentWarnings = accumulator[0], text = accumulator[1];
regex = parser[0], process = parser[1];
ref = consume(text, regex, process), warnings = ref[0], _remainingText = ref[1];
return [currentWarnings.concat(warnings), _remainingText];
}, [[], this.text]), allWarnings = ref[0], remainingText = ref[1];
ref1 = errorParsers.reduce(function(accumulator, parser) {
var _remainingText, currentErrors, errors, process, ref1, regex, text;
currentErrors = accumulator[0], text = accumulator[1];
regex = parser[0], process = parser[1];
ref1 = consume(text, regex, process), errors = ref1[0], _remainingText = ref1[1];
return [currentErrors.concat(errors), _remainingText];
}, [[], remainingText]), allErrors = ref1[0], remainingText = ref1[1];
result.warnings = allWarnings;
result.errors = allErrors;
result.all = allWarnings.concat(allErrors);
return result;
};
this.parseBiber = function() {
var result;
result = {
all: [],
errors: [],
warnings: [],
files: [],
typesetting: []
};
this.lines.forEach(function(line) {
var _, fileName, fullLine, lineMatch, lineNumber, match, message, messageType, newEntry, realMessage;
match = line.match(LINE_SPLITTER_REGEX);
if (match) {
fullLine = match[0], lineNumber = match[1], messageType = match[2], message = match[3];
newEntry = {
file: '',
level: MESSAGE_LEVELS[messageType] || "INFO",
message: message,
line: '',
raw: fullLine
};
lineMatch = newEntry.message.match(/^BibTeX subsystem: \/.+\/(\w+\.\w+)_.+, line (\d+), (.+)$/);
if (lineMatch && lineMatch.length === 4) {
_ = lineMatch[0], fileName = lineMatch[1], lineNumber = lineMatch[2], realMessage = lineMatch[3];
newEntry.file = fileName;
newEntry.line = lineNumber;
newEntry.message = realMessage;
}
result.all.push(newEntry);
switch (newEntry.level) {
case 'error':
return result.errors.push(newEntry);
case 'warning':
return result.warnings.push(newEntry);
}
}
});
return result;
};
return this.parse = function() {
var firstLine;
firstLine = this.lines[0];
if (firstLine.match(/^.*INFO - This is Biber.*$/)) {
return this.parseBiber();
} else if (firstLine.match(/^This is BibTeX, Version.+$/)) {
return this.parseBibtex();
} else {
throw new Error("BibLogParser Error: cannot determine whether text is biber or bibtex output");
}
};
}).call(BibLogParser.prototype);
BibLogParser.parse = function(text, options) {
return new BibLogParser(text, options).parse();
};
return BibLogParser;
});

View file

@ -1,298 +0,0 @@
// Generated by CoffeeScript 1.12.7
define(function() {
var HBOX_WARNING_REGEX, LATEX_WARNING_REGEX, LINES_REGEX, LOG_WRAP_LIMIT, LatexParser, LogText, PACKAGE_REGEX, PACKAGE_WARNING_REGEX, state;
LOG_WRAP_LIMIT = 79;
LATEX_WARNING_REGEX = /^LaTeX Warning: (.*)$/;
HBOX_WARNING_REGEX = /^(Over|Under)full \\(v|h)box/;
PACKAGE_WARNING_REGEX = /^(Package \b.+\b Warning:.*)$/;
LINES_REGEX = /lines? ([0-9]+)/;
PACKAGE_REGEX = /^Package (\b.+\b) Warning/;
LogText = function(text) {
var i, wrappedLines;
this.text = text.replace(/(\r\n)|\r/g, '\n');
wrappedLines = this.text.split('\n');
this.lines = [wrappedLines[0]];
i = 1;
while (i < wrappedLines.length) {
if (wrappedLines[i - 1].length === LOG_WRAP_LIMIT && wrappedLines[i - 1].slice(-3) !== '...') {
this.lines[this.lines.length - 1] += wrappedLines[i];
} else {
this.lines.push(wrappedLines[i]);
}
i++;
}
this.row = 0;
};
(function() {
this.nextLine = function() {
this.row++;
if (this.row >= this.lines.length) {
return false;
} else {
return this.lines[this.row];
}
};
this.rewindLine = function() {
this.row--;
};
this.linesUpToNextWhitespaceLine = function() {
return this.linesUpToNextMatchingLine(/^ *$/);
};
this.linesUpToNextMatchingLine = function(match) {
var lines, nextLine;
lines = [];
nextLine = this.nextLine();
if (nextLine !== false) {
lines.push(nextLine);
}
while (nextLine !== false && !nextLine.match(match) && nextLine !== false) {
nextLine = this.nextLine();
if (nextLine !== false) {
lines.push(nextLine);
}
}
return lines;
};
}).call(LogText.prototype);
state = {
NORMAL: 0,
ERROR: 1
};
LatexParser = function(text, options) {
this.log = new LogText(text);
this.state = state.NORMAL;
options = options || {};
this.fileBaseNames = options.fileBaseNames || [/compiles/, /\/usr\/local/];
this.ignoreDuplicates = options.ignoreDuplicates;
this.data = [];
this.fileStack = [];
this.currentFileList = this.rootFileList = [];
this.openParens = 0;
};
(function() {
this.parse = function() {
var lineNo;
while ((this.currentLine = this.log.nextLine()) !== false) {
if (this.state === state.NORMAL) {
if (this.currentLineIsError()) {
this.state = state.ERROR;
this.currentError = {
line: null,
file: this.currentFilePath,
level: 'error',
message: this.currentLine.slice(2),
content: '',
raw: this.currentLine + '\n'
};
} else if (this.currentLineIsRunawayArgument()) {
this.parseRunawayArgumentError();
} else if (this.currentLineIsWarning()) {
this.parseSingleWarningLine(LATEX_WARNING_REGEX);
} else if (this.currentLineIsHboxWarning()) {
this.parseHboxLine();
} else if (this.currentLineIsPackageWarning()) {
this.parseMultipleWarningLine();
} else {
this.parseParensForFilenames();
}
}
if (this.state === state.ERROR) {
this.currentError.content += this.log.linesUpToNextMatchingLine(/^l\.[0-9]+/).join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.raw += this.currentError.content;
lineNo = this.currentError.raw.match(/l\.([0-9]+)/);
if (lineNo) {
this.currentError.line = parseInt(lineNo[1], 10);
}
this.data.push(this.currentError);
this.state = state.NORMAL;
}
}
return this.postProcess(this.data);
};
this.currentLineIsError = function() {
return this.currentLine[0] === '!';
};
this.currentLineIsRunawayArgument = function() {
return this.currentLine.match(/^Runaway argument/);
};
this.currentLineIsWarning = function() {
return !!this.currentLine.match(LATEX_WARNING_REGEX);
};
this.currentLineIsPackageWarning = function() {
return !!this.currentLine.match(PACKAGE_WARNING_REGEX);
};
this.currentLineIsHboxWarning = function() {
return !!this.currentLine.match(HBOX_WARNING_REGEX);
};
this.parseRunawayArgumentError = function() {
var lineNo;
this.currentError = {
line: null,
file: this.currentFilePath,
level: 'error',
message: this.currentLine,
content: '',
raw: this.currentLine + '\n'
};
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.raw += this.currentError.content;
lineNo = this.currentError.raw.match(/l\.([0-9]+)/);
if (lineNo) {
this.currentError.line = parseInt(lineNo[1], 10);
}
return this.data.push(this.currentError);
};
this.parseSingleWarningLine = function(prefix_regex) {
var line, lineMatch, warning, warningMatch;
warningMatch = this.currentLine.match(prefix_regex);
if (!warningMatch) {
return;
}
warning = warningMatch[1];
lineMatch = warning.match(LINES_REGEX);
line = lineMatch ? parseInt(lineMatch[1], 10) : null;
this.data.push({
line: line,
file: this.currentFilePath,
level: 'warning',
message: warning,
raw: warning
});
};
this.parseMultipleWarningLine = function() {
var line, lineMatch, packageMatch, packageName, prefixRegex, raw_message, warningMatch, warning_lines;
warningMatch = this.currentLine.match(PACKAGE_WARNING_REGEX);
if (!warningMatch) {
return;
}
warning_lines = [warningMatch[1]];
lineMatch = this.currentLine.match(LINES_REGEX);
line = lineMatch ? parseInt(lineMatch[1], 10) : null;
packageMatch = this.currentLine.match(PACKAGE_REGEX);
packageName = packageMatch[1];
prefixRegex = new RegExp('(?:\\(' + packageName + '\\))*[\\s]*(.*)', 'i');
while (!!(this.currentLine = this.log.nextLine())) {
lineMatch = this.currentLine.match(LINES_REGEX);
line = lineMatch ? parseInt(lineMatch[1], 10) : line;
warningMatch = this.currentLine.match(prefixRegex);
warning_lines.push(warningMatch[1]);
}
raw_message = warning_lines.join(' ');
this.data.push({
line: line,
file: this.currentFilePath,
level: 'warning',
message: raw_message,
raw: raw_message
});
};
this.parseHboxLine = function() {
var line, lineMatch;
lineMatch = this.currentLine.match(LINES_REGEX);
line = lineMatch ? parseInt(lineMatch[1], 10) : null;
this.data.push({
line: line,
file: this.currentFilePath,
level: 'typesetting',
message: this.currentLine,
raw: this.currentLine
});
};
this.parseParensForFilenames = function() {
var filePath, newFile, pos, previousFile, token;
pos = this.currentLine.search(/\(|\)/);
if (pos !== -1) {
token = this.currentLine[pos];
this.currentLine = this.currentLine.slice(pos + 1);
if (token === '(') {
filePath = this.consumeFilePath();
if (filePath) {
this.currentFilePath = filePath;
newFile = {
path: filePath,
files: []
};
this.fileStack.push(newFile);
this.currentFileList.push(newFile);
this.currentFileList = newFile.files;
} else {
this.openParens++;
}
} else if (token === ')') {
if (this.openParens > 0) {
this.openParens--;
} else {
if (this.fileStack.length > 1) {
this.fileStack.pop();
previousFile = this.fileStack[this.fileStack.length - 1];
this.currentFilePath = previousFile.path;
this.currentFileList = previousFile.files;
}
}
}
this.parseParensForFilenames();
}
};
this.consumeFilePath = function() {
var endOfFilePath, path;
if (!this.currentLine.match(/^\/?([^ \)]+\/)+/)) {
return false;
}
endOfFilePath = this.currentLine.search(RegExp(' |\\)'));
path = void 0;
if (endOfFilePath === -1) {
path = this.currentLine;
this.currentLine = '';
} else {
path = this.currentLine.slice(0, endOfFilePath);
this.currentLine = this.currentLine.slice(endOfFilePath);
}
return path;
};
return this.postProcess = function(data) {
var all, errors, hashEntry, hashes, i, typesetting, warnings;
all = [];
errors = [];
warnings = [];
typesetting = [];
hashes = [];
hashEntry = function(entry) {
return entry.raw;
};
i = 0;
while (i < data.length) {
if (this.ignoreDuplicates && hashes.indexOf(hashEntry(data[i])) > -1) {
i++;
continue;
}
if (data[i].level === 'error') {
errors.push(data[i]);
} else if (data[i].level === 'typesetting') {
typesetting.push(data[i]);
} else if (data[i].level === 'warning') {
warnings.push(data[i]);
}
all.push(data[i]);
hashes.push(hashEntry(data[i]));
i++;
}
return {
errors: errors,
warnings: warnings,
typesetting: typesetting,
all: all,
files: this.rootFileList
};
};
}).call(LatexParser.prototype);
LatexParser.parse = function(text, options) {
return new LatexParser(text, options).parse();
};
return LatexParser;
});

View file

@ -1,6 +1,6 @@
{
"name": "latex-log-parser-sharelatex",
"version": "1.0.0",
"name": "@overleaf/latex-log-parser",
"version": "2.0.0",
"description": "",
"scripts": {
"lint": "echo noop",
@ -12,6 +12,5 @@
"author": "",
"license": "MIT",
"dependencies": {
"coffee-script": "^1.10.0"
}
}

View file

@ -1,188 +0,0 @@
define ->
# [fullLine, lineNumber, messageType, message]
LINE_SPLITTER_REGEX = /^\[(\d+)].*>\s(INFO|WARN|ERROR)\s-\s(.*)$/
MESSAGE_LEVELS = {
"INFO": "info"
"WARN": "warning"
"ERROR": "error"
}
BibLogParser = (text, options) ->
if typeof text != 'string'
throw new Error("BibLogParser Error: text parameter must be a string")
@text = text.replace(/(\r\n)|\r/g, '\n')
@options = options || {}
@lines = text.split('\n')
return
consume = (logText, regex, process) ->
text = logText
result = []
re = regex
iterationCount = 0
while match = re.exec(text)
iterationCount += 1
if iterationCount >= 10000
return result
newEntry = process(match)
result.push newEntry
text = (
(match.input.slice(0, match.index)) +
(match.input.slice(match.index+match[0].length+1, match.input.length))
)
return [result, text]
MULTILINE_WARNING_REGEX = /^Warning--(.+)\n--line (\d+) of file (.+)$/m
SINGLELINE_WARNING_REGEX = /^Warning--(.+)$/m
MULTILINE_ERROR_REGEX = /^(.*)---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this entry$/m
BAD_CROSS_REFERENCE_REGEX = /^(A bad cross reference---entry ".+?"\nrefers to entry.+?, which doesn't exist)$/m
MULTILINE_COMMAND_ERROR_REGEX = /^(.*)\n?---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this command$/m
# each parser is a pair of [regex, processFunction], where processFunction
# describes how to transform the regex mactch into a log entry object.
warningParsers = [
[
MULTILINE_WARNING_REGEX,
(match) ->
[fullMatch, message, lineNumber, fileName] = match
{
file: fileName,
level: "warning",
message: message,
line: lineNumber,
raw: fullMatch
}
],
[
SINGLELINE_WARNING_REGEX,
(match) ->
[fullMatch, message] = match
{
file: '',
level: "warning",
message: message,
line: '',
raw: fullMatch
}
]
]
errorParsers = [
[
MULTILINE_ERROR_REGEX,
(match) ->
[fullMatch, firstMessage, lineNumber, fileName, secondMessage] = match
{
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
}
],
[
BAD_CROSS_REFERENCE_REGEX,
(match) ->
[fullMatch, message] = match
{
file: '',
level: "error",
message: message,
line: '',
raw: fullMatch
}
],
[
MULTILINE_COMMAND_ERROR_REGEX,
(match) ->
[fullMatch, firstMessage, lineNumber, fileName, secondMessage] = match
{
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
}
]
]
(->
@parseBibtex = () ->
result = {
all: [],
errors: [],
warnings: [],
files: [], # not used
typesetting: [] # not used
}
# reduce over the parsers, starting with the log text,
[allWarnings, remainingText] = warningParsers.reduce(
(accumulator, parser) ->
[currentWarnings, text] = accumulator
[regex, process] = parser
[warnings, _remainingText] = consume text, regex, process
return [currentWarnings.concat(warnings), _remainingText]
, [[], @text]
)
[allErrors, remainingText] = errorParsers.reduce(
(accumulator, parser) ->
[currentErrors, text] = accumulator
[regex, process] = parser
[errors, _remainingText] = consume text, regex, process
return [currentErrors.concat(errors), _remainingText]
, [[], remainingText]
)
result.warnings = allWarnings
result.errors = allErrors
result.all = allWarnings.concat(allErrors)
return result
@parseBiber = () ->
result = {
all: [],
errors: [],
warnings: [],
files: [], # not used
typesetting: [] # not used
}
@lines.forEach (line) ->
match = line.match(LINE_SPLITTER_REGEX)
if match
[fullLine, lineNumber, messageType, message] = match
newEntry = {
file: '',
level: MESSAGE_LEVELS[messageType] || "INFO",
message: message,
line: '',
raw: fullLine
}
# try extract file, line-number and the 'real' message from lines like:
# BibTeX subsystem: /.../original.bib_123.utf8, line 8, syntax error: it's bad
lineMatch = newEntry.message.match(/^BibTeX subsystem: \/.+\/(\w+\.\w+)_.+, line (\d+), (.+)$/)
if lineMatch && lineMatch.length == 4
[_, fileName, lineNumber, realMessage] = lineMatch
newEntry.file = fileName
newEntry.line = lineNumber
newEntry.message = realMessage
result.all.push newEntry
switch newEntry.level
when 'error' then result.errors.push newEntry
when 'warning' then result.warnings.push newEntry
return result
@parse = () ->
firstLine = @lines[0]
if firstLine.match(/^.*INFO - This is Biber.*$/)
@parseBiber()
else if firstLine.match(/^This is BibTeX, Version.+$/)
@parseBibtex()
else
throw new Error("BibLogParser Error: cannot determine whether text is biber or bibtex output")
).call(BibLogParser.prototype)
BibLogParser.parse = (text, options) ->
new BibLogParser(text, options).parse()
return BibLogParser

View file

@ -1,290 +0,0 @@
define ->
# Define some constants
LOG_WRAP_LIMIT = 79
LATEX_WARNING_REGEX = /^LaTeX Warning: (.*)$/
HBOX_WARNING_REGEX = /^(Over|Under)full \\(v|h)box/
PACKAGE_WARNING_REGEX = /^(Package \b.+\b Warning:.*)$/
# This is used to parse the line number from common latex warnings
LINES_REGEX = /lines? ([0-9]+)/
# This is used to parse the package name from the package warnings
PACKAGE_REGEX = /^Package (\b.+\b) Warning/
LogText = (text) ->
@text = text.replace(/(\r\n)|\r/g, '\n')
# Join any lines which look like they have wrapped.
wrappedLines = @text.split('\n')
@lines = [ wrappedLines[0] ]
i = 1
while i < wrappedLines.length
# If the previous line is as long as the wrap limit then
# append this line to it.
# Some lines end with ... when LaTeX knows it's hit the limit
# These shouldn't be wrapped.
if wrappedLines[i - 1].length == LOG_WRAP_LIMIT and wrappedLines[i - 1].slice(-3) != '...'
@lines[@lines.length - 1] += wrappedLines[i]
else
@lines.push wrappedLines[i]
i++
@row = 0
return
(->
@nextLine = () ->
@row++
if @row >= @lines.length
false
else
@lines[@row]
@rewindLine = ->
@row--
return
@linesUpToNextWhitespaceLine = () ->
@linesUpToNextMatchingLine /^ *$/
@linesUpToNextMatchingLine = (match) ->
lines = []
nextLine = @nextLine()
if nextLine != false
lines.push nextLine
while nextLine != false and !nextLine.match(match) and nextLine != false
nextLine = @nextLine()
if nextLine != false
lines.push nextLine
lines
return
).call(LogText.prototype)
state =
NORMAL: 0
ERROR: 1
LatexParser = (text, options) ->
@log = new LogText(text)
@state = state.NORMAL
options = options || {}
@fileBaseNames = options.fileBaseNames || [
/compiles/
/\/usr\/local/
]
@ignoreDuplicates = options.ignoreDuplicates
@data = []
@fileStack = []
@currentFileList = @rootFileList = []
@openParens = 0
return
(->
@parse = () ->
while (@currentLine = @log.nextLine()) != false
if @state == state.NORMAL
if @currentLineIsError()
@state = state.ERROR
@currentError =
line: null
file: @currentFilePath
level: 'error'
message: @currentLine.slice(2)
content: ''
raw: @currentLine + '\n'
else if @currentLineIsRunawayArgument()
@parseRunawayArgumentError()
else if @currentLineIsWarning()
@parseSingleWarningLine LATEX_WARNING_REGEX
else if @currentLineIsHboxWarning()
@parseHboxLine()
else if @currentLineIsPackageWarning()
@parseMultipleWarningLine()
else
@parseParensForFilenames()
if @state == state.ERROR
@currentError.content += @log.linesUpToNextMatchingLine(/^l\.[0-9]+/).join('\n')
@currentError.content += '\n'
@currentError.content += @log.linesUpToNextWhitespaceLine().join('\n')
@currentError.content += '\n'
@currentError.content += @log.linesUpToNextWhitespaceLine().join('\n')
@currentError.raw += @currentError.content
lineNo = @currentError.raw.match(/l\.([0-9]+)/)
if lineNo
@currentError.line = parseInt(lineNo[1], 10)
@data.push @currentError
@state = state.NORMAL
@postProcess @data
@currentLineIsError = ->
@currentLine[0] == '!'
@currentLineIsRunawayArgument = ->
@currentLine.match(/^Runaway argument/)
@currentLineIsWarning = ->
!!@currentLine.match(LATEX_WARNING_REGEX)
@currentLineIsPackageWarning = ->
!!@currentLine.match(PACKAGE_WARNING_REGEX)
@currentLineIsHboxWarning = ->
!!@currentLine.match(HBOX_WARNING_REGEX)
@parseRunawayArgumentError = ->
@currentError =
line: null
file: @currentFilePath
level: 'error'
message: @currentLine
content: ''
raw: @currentLine + '\n'
@currentError.content += @log.linesUpToNextWhitespaceLine().join('\n')
@currentError.content += '\n'
@currentError.content += @log.linesUpToNextWhitespaceLine().join('\n')
@currentError.raw += @currentError.content
lineNo = @currentError.raw.match(/l\.([0-9]+)/)
if lineNo
@currentError.line = parseInt(lineNo[1], 10)
@data.push @currentError
@parseSingleWarningLine = (prefix_regex) ->
warningMatch = @currentLine.match(prefix_regex)
if !warningMatch
return
warning = warningMatch[1]
lineMatch = warning.match(LINES_REGEX)
line = if lineMatch then parseInt(lineMatch[1], 10) else null
@data.push
line: line
file: @currentFilePath
level: 'warning'
message: warning
raw: warning
return
@parseMultipleWarningLine = ->
# Some package warnings are multiple lines, let's parse the first line
warningMatch = @currentLine.match(PACKAGE_WARNING_REGEX)
if !warningMatch
return
# Something strange happened, return early
warning_lines = [ warningMatch[1] ]
lineMatch = @currentLine.match(LINES_REGEX)
line = if lineMatch then parseInt(lineMatch[1], 10) else null
packageMatch = @currentLine.match(PACKAGE_REGEX)
packageName = packageMatch[1]
# Regex to get rid of the unnecesary (packagename) prefix in most multi-line warnings
prefixRegex = new RegExp('(?:\\(' + packageName + '\\))*[\\s]*(.*)', 'i')
# After every warning message there's a blank line, let's use it
while !!(@currentLine = @log.nextLine())
lineMatch = @currentLine.match(LINES_REGEX)
line = if lineMatch then parseInt(lineMatch[1], 10) else line
warningMatch = @currentLine.match(prefixRegex)
warning_lines.push warningMatch[1]
raw_message = warning_lines.join(' ')
@data.push
line: line
file: @currentFilePath
level: 'warning'
message: raw_message
raw: raw_message
return
@parseHboxLine = ->
lineMatch = @currentLine.match(LINES_REGEX)
line = if lineMatch then parseInt(lineMatch[1], 10) else null
@data.push
line: line
file: @currentFilePath
level: 'typesetting'
message: @currentLine
raw: @currentLine
return
# Check if we're entering or leaving a new file in this line
@parseParensForFilenames = ->
pos = @currentLine.search(/\(|\)/)
if pos != -1
token = @currentLine[pos]
@currentLine = @currentLine.slice(pos + 1)
if token == '('
filePath = @consumeFilePath()
if filePath
@currentFilePath = filePath
newFile =
path: filePath
files: []
@fileStack.push newFile
@currentFileList.push newFile
@currentFileList = newFile.files
else
@openParens++
else if token == ')'
if @openParens > 0
@openParens--
else
if @fileStack.length > 1
@fileStack.pop()
previousFile = @fileStack[@fileStack.length - 1]
@currentFilePath = previousFile.path
@currentFileList = previousFile.files
# else {
# Something has gone wrong but all we can do now is ignore it :(
# }
# Process the rest of the line
@parseParensForFilenames()
return
@consumeFilePath = ->
# Our heuristic for detecting file names are rather crude
# A file may not contain a space, or ) in it
# To be a file path it must have at least one /
if !@currentLine.match(/^\/?([^ \)]+\/)+/)
return false
endOfFilePath = @currentLine.search(RegExp(' |\\)'))
path = undefined
if endOfFilePath == -1
path = @currentLine
@currentLine = ''
else
path = @currentLine.slice(0, endOfFilePath)
@currentLine = @currentLine.slice(endOfFilePath)
path
@postProcess = (data) ->
all = []
errors = []
warnings = []
typesetting = []
hashes = []
hashEntry = (entry) ->
entry.raw
i = 0
while i < data.length
if (@ignoreDuplicates and hashes.indexOf(hashEntry(data[i])) > -1)
i++
continue
if data[i].level == 'error'
errors.push data[i]
else if data[i].level == 'typesetting'
typesetting.push data[i]
else if data[i].level == 'warning'
warnings.push data[i]
all.push data[i]
hashes.push hashEntry(data[i])
i++
return {
errors: errors
warnings: warnings
typesetting: typesetting
all: all
files: @rootFileList
}
).call(LatexParser.prototype)
LatexParser.parse = (text, options) ->
new LatexParser(text, options).parse()
LatexParser

View file

@ -0,0 +1,238 @@
define(function() {
// [fullLine, lineNumber, messageType, message]
const LINE_SPLITTER_REGEX = /^\[(\d+)].*>\s(INFO|WARN|ERROR)\s-\s(.*)$/;
const MESSAGE_LEVELS = {
"INFO": "info",
"WARN": "warning",
"ERROR": "error"
};
const BibLogParser = function(text, options) {
if (typeof text !== 'string') {
throw new Error("BibLogParser Error: text parameter must be a string");
}
this.text = text.replace(/(\r\n)|\r/g, '\n');
this.options = options || {};
this.lines = text.split('\n');
};
const consume = function(logText, regex, process) {
let match;
let text = logText;
const result = [];
const re = regex;
let iterationCount = 0;
while ((match = re.exec(text))) {
iterationCount += 1;
const newEntry = process(match);
// Too many log entries can cause browser crashes
// Construct a too many files error from the last match
var maxErrors = 100;
if (iterationCount >= maxErrors) {
var level = newEntry.level + "s";
newEntry.message = [
"Over",
maxErrors,
level,
"returned. Download raw logs to see full list"
].join(" ");
newEntry.line = undefined;
result.unshift(newEntry);
return [result, ""];
}
result.push(newEntry);
text = (
(match.input.slice(0, match.index)) +
(match.input.slice(match.index+match[0].length+1, match.input.length))
);
}
return [result, text];
};
const MULTILINE_WARNING_REGEX = /^Warning--(.+)\n--line (\d+) of file (.+)$/m;
const SINGLELINE_WARNING_REGEX = /^Warning--(.+)$/m;
const MULTILINE_ERROR_REGEX = /^(.*)---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this entry$/m;
const BAD_CROSS_REFERENCE_REGEX = /^(A bad cross reference---entry ".+?"\nrefers to entry.+?, which doesn't exist)$/m;
const MULTILINE_COMMAND_ERROR_REGEX = /^(.*)\n?---line (\d+) of file (.*)\n([^]+?)\nI'm skipping whatever remains of this command$/m;
// Errors hit in BST file have a slightly different format
const BST_ERROR_REGEX = /^(.*?)\nwhile executing---line (\d+) of file (.*)/m;
// each parser is a pair of [regex, processFunction], where processFunction
// describes how to transform the regex mactch into a log entry object.
const warningParsers = [
[
MULTILINE_WARNING_REGEX,
function(match) {
const [fullMatch, message, lineNumber, fileName] = match;
return {
file: fileName,
level: "warning",
message,
line: lineNumber,
raw: fullMatch
};
}
],
[
SINGLELINE_WARNING_REGEX,
function(match) {
const [fullMatch, message] = match;
return {
file: '',
level: "warning",
message,
line: '',
raw: fullMatch
};
}
]
];
const errorParsers = [
[
MULTILINE_ERROR_REGEX,
function(match) {
const [fullMatch, firstMessage, lineNumber, fileName, secondMessage] = match;
return {
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
};
}
],
[
BAD_CROSS_REFERENCE_REGEX,
function(match) {
const [fullMatch, message] = match;
return {
file: '',
level: "error",
message,
line: '',
raw: fullMatch
};
}
],
[
MULTILINE_COMMAND_ERROR_REGEX,
function(match) {
const [fullMatch, firstMessage, lineNumber, fileName, secondMessage] = match;
return {
file: fileName,
level: "error",
message: firstMessage + '\n' + secondMessage,
line: lineNumber,
raw: fullMatch
};
}
],[
BST_ERROR_REGEX, function(match) {
var fileName, firstMessage, fullMatch, lineNumber, secondMessage;
fullMatch = match[0], firstMessage = match[1], lineNumber = match[2], fileName = match[3];
return {
file: fileName,
level: "error",
message: firstMessage,
line: lineNumber,
raw: fullMatch
};
}
]
];
(function() {
this.parseBibtex = function() {
let allErrors;
const result = {
all: [],
errors: [],
warnings: [],
files: [], // not used
typesetting: [] // not used
};
// reduce over the parsers, starting with the log text,
let [allWarnings, remainingText] = warningParsers.reduce(
function(accumulator, parser) {
const [currentWarnings, text] = accumulator;
const [regex, process] = parser;
const [warnings, _remainingText] = consume(text, regex, process);
return [currentWarnings.concat(warnings), _remainingText];
}
, [[], this.text]
);
[allErrors, remainingText] = errorParsers.reduce(
function(accumulator, parser) {
const [currentErrors, text] = accumulator;
const [regex, process] = parser;
const [errors, _remainingText] = consume(text, regex, process);
return [currentErrors.concat(errors), _remainingText];
}
, [[], remainingText]
);
result.warnings = allWarnings;
result.errors = allErrors;
result.all = allWarnings.concat(allErrors);
return result;
}
this.parseBiber = function() {
const result = {
all: [],
errors: [],
warnings: [],
files: [], // not used
typesetting: [] // not used
};
this.lines.forEach(function(line) {
const match = line.match(LINE_SPLITTER_REGEX);
if (match) {
let [fullLine, lineNumber, messageType, message] = match;
const newEntry = {
file: '',
level: MESSAGE_LEVELS[messageType] || "INFO",
message,
line: '',
raw: fullLine
};
// try extract file, line-number and the 'real' message from lines like:
// BibTeX subsystem: /.../original.bib_123.utf8, line 8, syntax error: it's bad
const lineMatch = newEntry.message.match(/^BibTeX subsystem: \/.+\/(\w+\.\w+)_.+, line (\d+), (.+)$/);
if (lineMatch && (lineMatch.length === 4)) {
let _, fileName, realMessage;
[_, fileName, lineNumber, realMessage] = lineMatch;
newEntry.file = fileName;
newEntry.line = lineNumber;
newEntry.message = realMessage;
}
result.all.push(newEntry);
switch (newEntry.level) {
case 'error': return result.errors.push(newEntry);
case 'warning': return result.warnings.push(newEntry);
}
}
});
return result;
};
return this.parse = function() {
const firstLine = this.lines[0];
if (firstLine.match(/^.*INFO - This is Biber.*$/)) {
return this.parseBiber();
} else if (firstLine.match(/^This is BibTeX, Version.+$/)) {
return this.parseBibtex();
} else {
throw new Error("BibLogParser Error: cannot determine whether text is biber or bibtex output");
}
};
}).call(BibLogParser.prototype);
BibLogParser.parse = (text, options) => new BibLogParser(text, options).parse();
return BibLogParser;
});

View file

@ -0,0 +1,332 @@
define(function() {
// Define some constants
const LOG_WRAP_LIMIT = 79;
const LATEX_WARNING_REGEX = /^LaTeX Warning: (.*)$/;
const HBOX_WARNING_REGEX = /^(Over|Under)full \\(v|h)box/;
const PACKAGE_WARNING_REGEX = /^(Package \b.+\b Warning:.*)$/;
// This is used to parse the line number from common latex warnings
const LINES_REGEX = /lines? ([0-9]+)/;
// This is used to parse the package name from the package warnings
const PACKAGE_REGEX = /^Package (\b.+\b) Warning/;
const LogText = function(text) {
this.text = text.replace(/(\r\n)|\r/g, '\n');
// Join any lines which look like they have wrapped.
const wrappedLines = this.text.split('\n');
this.lines = [ wrappedLines[0] ];
let i = 1;
while (i < wrappedLines.length) {
// If the previous line is as long as the wrap limit then
// append this line to it.
// Some lines end with ... when LaTeX knows it's hit the limit
// These shouldn't be wrapped.
if ((wrappedLines[i - 1].length === LOG_WRAP_LIMIT) && (wrappedLines[i - 1].slice(-3) !== '...')) {
this.lines[this.lines.length - 1] += wrappedLines[i];
} else {
this.lines.push(wrappedLines[i]);
}
i++;
}
this.row = 0;
};
(function() {
this.nextLine = function() {
this.row++;
if (this.row >= this.lines.length) {
return false;
} else {
return this.lines[this.row];
}
};
this.rewindLine = function() {
this.row--;
};
this.linesUpToNextWhitespaceLine = function() {
return this.linesUpToNextMatchingLine(/^ *$/);
};
this.linesUpToNextMatchingLine = function(match) {
const lines = [];
let nextLine = this.nextLine();
if (nextLine !== false) {
lines.push(nextLine);
}
while ((nextLine !== false) && !nextLine.match(match) && (nextLine !== false)) {
nextLine = this.nextLine();
if (nextLine !== false) {
lines.push(nextLine);
}
}
return lines;
};
}).call(LogText.prototype);
const state = {
NORMAL: 0,
ERROR: 1
};
const LatexParser = function(text, options) {
this.log = new LogText(text);
this.state = state.NORMAL;
options = options || {};
this.fileBaseNames = options.fileBaseNames || [
/compiles/,
/\/usr\/local/
];
this.ignoreDuplicates = options.ignoreDuplicates;
this.data = [];
this.fileStack = [];
this.currentFileList = (this.rootFileList = []);
this.openParens = 0;
};
(function() {
this.parse = function() {
while ((this.currentLine = this.log.nextLine()) !== false) {
if (this.state === state.NORMAL) {
if (this.currentLineIsError()) {
this.state = state.ERROR;
this.currentError = {
line: null,
file: this.currentFilePath,
level: 'error',
message: this.currentLine.slice(2),
content: '',
raw: this.currentLine + '\n'
};
} else if (this.currentLineIsRunawayArgument()) {
this.parseRunawayArgumentError();
} else if (this.currentLineIsWarning()) {
this.parseSingleWarningLine(LATEX_WARNING_REGEX);
} else if (this.currentLineIsHboxWarning()) {
this.parseHboxLine();
} else if (this.currentLineIsPackageWarning()) {
this.parseMultipleWarningLine();
} else {
this.parseParensForFilenames();
}
}
if (this.state === state.ERROR) {
this.currentError.content += this.log.linesUpToNextMatchingLine(/^l\.[0-9]+/).join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.raw += this.currentError.content;
const lineNo = this.currentError.raw.match(/l\.([0-9]+)/);
if (lineNo) {
this.currentError.line = parseInt(lineNo[1], 10);
}
this.data.push(this.currentError);
this.state = state.NORMAL;
}
}
return this.postProcess(this.data);
};
this.currentLineIsError = function() {
return this.currentLine[0] === '!';
};
this.currentLineIsRunawayArgument = function() {
return this.currentLine.match(/^Runaway argument/);
};
this.currentLineIsWarning = function() {
return !!this.currentLine.match(LATEX_WARNING_REGEX);
};
this.currentLineIsPackageWarning = function() {
return !!this.currentLine.match(PACKAGE_WARNING_REGEX);
};
this.currentLineIsHboxWarning = function() {
return !!this.currentLine.match(HBOX_WARNING_REGEX);
};
this.parseRunawayArgumentError = function() {
this.currentError = {
line: null,
file: this.currentFilePath,
level: 'error',
message: this.currentLine,
content: '',
raw: this.currentLine + '\n'
};
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.content += '\n';
this.currentError.content += this.log.linesUpToNextWhitespaceLine().join('\n');
this.currentError.raw += this.currentError.content;
const lineNo = this.currentError.raw.match(/l\.([0-9]+)/);
if (lineNo) {
this.currentError.line = parseInt(lineNo[1], 10);
}
return this.data.push(this.currentError);
};
this.parseSingleWarningLine = function(prefix_regex) {
const warningMatch = this.currentLine.match(prefix_regex);
if (!warningMatch) {
return;
}
const warning = warningMatch[1];
const lineMatch = warning.match(LINES_REGEX);
const line = lineMatch ? parseInt(lineMatch[1], 10) : null;
this.data.push({
line,
file: this.currentFilePath,
level: 'warning',
message: warning,
raw: warning
});
};
this.parseMultipleWarningLine = function() {
// Some package warnings are multiple lines, let's parse the first line
let warningMatch = this.currentLine.match(PACKAGE_WARNING_REGEX);
if (!warningMatch) {
return;
}
// Something strange happened, return early
const warning_lines = [ warningMatch[1] ];
let lineMatch = this.currentLine.match(LINES_REGEX);
let line = lineMatch ? parseInt(lineMatch[1], 10) : null;
const packageMatch = this.currentLine.match(PACKAGE_REGEX);
const packageName = packageMatch[1];
// Regex to get rid of the unnecesary (packagename) prefix in most multi-line warnings
const prefixRegex = new RegExp('(?:\\(' + packageName + '\\))*[\\s]*(.*)', 'i');
// After every warning message there's a blank line, let's use it
while (!!(this.currentLine = this.log.nextLine())) {
lineMatch = this.currentLine.match(LINES_REGEX);
line = lineMatch ? parseInt(lineMatch[1], 10) : line;
warningMatch = this.currentLine.match(prefixRegex);
warning_lines.push(warningMatch[1]);
}
const raw_message = warning_lines.join(' ');
this.data.push({
line,
file: this.currentFilePath,
level: 'warning',
message: raw_message,
raw: raw_message
});
};
this.parseHboxLine = function() {
const lineMatch = this.currentLine.match(LINES_REGEX);
const line = lineMatch ? parseInt(lineMatch[1], 10) : null;
this.data.push({
line,
file: this.currentFilePath,
level: 'typesetting',
message: this.currentLine,
raw: this.currentLine
});
};
// Check if we're entering or leaving a new file in this line
this.parseParensForFilenames = function() {
const pos = this.currentLine.search(/\(|\)/);
if (pos !== -1) {
const token = this.currentLine[pos];
this.currentLine = this.currentLine.slice(pos + 1);
if (token === '(') {
const filePath = this.consumeFilePath();
if (filePath) {
this.currentFilePath = filePath;
const newFile = {
path: filePath,
files: []
};
this.fileStack.push(newFile);
this.currentFileList.push(newFile);
this.currentFileList = newFile.files;
} else {
this.openParens++;
}
} else if (token === ')') {
if (this.openParens > 0) {
this.openParens--;
} else {
if (this.fileStack.length > 1) {
this.fileStack.pop();
const previousFile = this.fileStack[this.fileStack.length - 1];
this.currentFilePath = previousFile.path;
this.currentFileList = previousFile.files;
}
}
}
// else {
// Something has gone wrong but all we can do now is ignore it :(
// }
// Process the rest of the line
this.parseParensForFilenames();
}
};
this.consumeFilePath = function() {
// Our heuristic for detecting file names are rather crude
// A file may not contain a space, or ) in it
// To be a file path it must have at least one /
if (!this.currentLine.match(/^\/?([^ \)]+\/)+/)) {
return false;
}
const endOfFilePath = this.currentLine.search(RegExp(' |\\)'));
let path = undefined;
if (endOfFilePath === -1) {
path = this.currentLine;
this.currentLine = '';
} else {
path = this.currentLine.slice(0, endOfFilePath);
this.currentLine = this.currentLine.slice(endOfFilePath);
}
return path;
};
this.postProcess = function(data) {
const all = [];
const errors = [];
const warnings = [];
const typesetting = [];
const hashes = [];
const hashEntry = entry => entry.raw;
let i = 0;
while (i < data.length) {
if (this.ignoreDuplicates && (hashes.indexOf(hashEntry(data[i])) > -1)) {
i++;
continue;
}
if (data[i].level === 'error') {
errors.push(data[i]);
} else if (data[i].level === 'typesetting') {
typesetting.push(data[i]);
} else if (data[i].level === 'warning') {
warnings.push(data[i]);
}
all.push(data[i]);
hashes.push(hashEntry(data[i]));
i++;
}
return {
errors,
warnings,
typesetting,
all,
files: this.rootFileList
};
};
}).call(LatexParser.prototype);
LatexParser.parse = (text, options) => new LatexParser(text, options).parse();
return LatexParser;
});

View file

@ -1,6 +1,6 @@
define([
"../dist/latex-log-parser",
"../dist/bib-log-parser",
"../src/js/latex-log-parser",
"../src/js/bib-log-parser",
"text!logs/errors.log",
"text!logs/warnings.log",
"text!logs/bad-boxes.log",
@ -182,17 +182,17 @@ function(LatexParser, BibLogParser, errorLog, warningLog, badBoxesLog,
}
}
});
module("Runaway Arguments");
test("Runaway Arguments parsing", function() {
var errors = LatexParser.parse(runawayArgumentsLog).errors;
var expectedErrors = [
[null, "Runaway argument?", "/compile/runaway_argument.tex"] + "",
[null, "Emergency stop.", "/compile/runaway_argument.tex"] + ""
];
expect(expectedErrors.length);
for (var i = 0; i < errors.length; i++) {
if (expectedErrors.indexOf([errors[i].line, errors[i].message, errors[i].file] + "") > -1) {
@ -202,7 +202,7 @@ function(LatexParser, BibLogParser, errorLog, warningLog, badBoxesLog,
}
}
});
module("General");
test("Ignore Duplicates", function() {