overleaf/services/clsi/app/coffee/CompileManager.coffee

341 lines
14 KiB
CoffeeScript
Raw Normal View History

2014-02-12 12:27:43 -05:00
ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
2014-02-12 12:27:43 -05:00
Settings = require("settings-sharelatex")
Path = require "path"
logger = require "logger-sharelatex"
Metrics = require "./Metrics"
child_process = require "child_process"
DraftModeManager = require "./DraftModeManager"
TikzManager = require "./TikzManager"
2017-09-22 11:19:33 -04:00
LockManager = require "./LockManager"
2015-09-08 09:19:46 -04:00
fs = require("fs")
fse = require "fs-extra"
os = require("os")
async = require "async"
Errors = require './Errors'
2014-02-12 12:27:43 -05:00
2016-05-24 09:08:39 -04:00
commandRunner = Settings.clsi?.commandRunner or "./CommandRunner"
logger.info commandRunner:commandRunner, "selecting command runner for clsi"
CommandRunner = require(commandRunner)
2016-05-27 10:29:26 -04:00
getCompileName = (project_id, user_id) ->
if user_id? then "#{project_id}-#{user_id}" else project_id
getCompileDir = (project_id, user_id) ->
Path.join(Settings.path.compilesDir, getCompileName(project_id, user_id))
2014-02-12 12:27:43 -05:00
module.exports = CompileManager =
2017-09-22 11:19:33 -04:00
doCompileWithLock: (request, callback = (error, outputFiles) ->) ->
compileDir = getCompileDir(request.project_id, request.user_id)
lockFile = Path.join(compileDir, ".project-lock")
# use a .project-lock file in the compile directory to prevent
# simultaneous compiles
fse.ensureDir compileDir, (error) ->
return callback(error) if error?
LockManager.runWithLock lockFile, (releaseLock) ->
CompileManager.doCompile(request, releaseLock)
, callback
2014-02-12 12:27:43 -05:00
doCompile: (request, callback = (error, outputFiles) ->) ->
2016-05-27 10:29:26 -04:00
compileDir = getCompileDir(request.project_id, request.user_id)
console.log("doCompile",compileDir )
2014-02-12 12:27:43 -05:00
timer = new Metrics.Timer("write-to-disk")
logger.log project_id: request.project_id, user_id: request.user_id, "syncing resources to disk"
2017-08-17 09:53:35 -04:00
ResourceWriter.syncResourcesToDisk request, compileDir, (error, resourceList) ->
# NOTE: resourceList is insecure, it should only be used to exclude files from the output list
2017-08-07 09:26:13 -04:00
if error? and error instanceof Errors.FilesOutOfSyncError
logger.warn project_id: request.project_id, user_id: request.user_id, "files out of sync, please retry"
return callback(error)
else if error?
2016-05-27 10:29:26 -04:00
logger.err err:error, project_id: request.project_id, user_id: request.user_id, "error writing resources to disk"
2017-08-07 09:26:13 -04:00
return callback(error)
2016-05-27 10:29:26 -04:00
logger.log project_id: request.project_id, user_id: request.user_id, time_taken: Date.now() - timer.start, "written files to disk"
2014-02-12 12:27:43 -05:00
timer.done()
2017-08-17 09:53:35 -04:00
injectDraftModeIfRequired = (callback) ->
if request.draft
DraftModeManager.injectDraftMode Path.join(compileDir, request.rootResourcePath), callback
else
callback()
createTikzFileIfRequired = (callback) ->
2017-09-29 12:00:53 -04:00
TikzManager.checkMainFile compileDir, request.rootResourcePath, resourceList, (error, usesTikzExternalize) ->
return callback(error) if error?
if usesTikzExternalize
TikzManager.injectOutputFile compileDir, request.rootResourcePath, callback
else
callback()
# set up environment variables for chktex
env = {}
# only run chktex on LaTeX files (not knitr .Rtex files or any others)
isLaTeXFile = request.rootResourcePath?.match(/\.tex$/i)
if request.check? and isLaTeXFile
env['CHKTEX_OPTIONS'] = '-nall -e9 -e10 -w15 -w16'
2016-09-28 06:02:58 -04:00
env['CHKTEX_ULIMIT_OPTIONS'] = '-t 5 -v 64000'
2016-07-26 11:22:38 -04:00
if request.check is 'error'
env['CHKTEX_EXIT_ON_ERROR'] = 1
if request.check is 'validate'
env['CHKTEX_VALIDATE'] = 1
# apply a series of file modifications/creations for draft mode and tikz
async.series [injectDraftModeIfRequired, createTikzFileIfRequired], (error) ->
2014-02-12 12:27:43 -05:00
return callback(error) if error?
timer = new Metrics.Timer("run-compile")
2016-05-10 04:41:39 -04:00
# find the image tag to log it as a metric, e.g. 2015.1 (convert . to - for graphite)
tag = request.imageName?.match(/:(.*)/)?[1]?.replace(/\./g,'-') or "default"
tag = "other" if not request.project_id.match(/^[0-9a-f]{24}$/) # exclude smoke test
Metrics.inc("compiles")
Metrics.inc("compiles-with-image.#{tag}")
2016-05-27 10:29:26 -04:00
compileName = getCompileName(request.project_id, request.user_id)
LatexRunner.runLatex compileName, {
directory: compileDir
mainFile: request.rootResourcePath
compiler: request.compiler
timeout: request.timeout
image: request.imageName
environment: env
}, (error, output, stats, timings) ->
# request was for validation only
if request.check is "validate"
result = if error?.code then "fail" else "pass"
error = new Error("validation")
error.validate = result
# request was for compile, and failed on validation
if request.check is "error" and error?.message is 'exited'
error = new Error("compilation")
error.validate = "fail"
# compile was killed by user, was a validation, or a compile which failed validation
if error?.terminated or error?.validate
2017-08-17 09:53:35 -04:00
OutputFileFinder.findOutputFiles resourceList, compileDir, (err, outputFiles) ->
2016-07-14 09:47:36 -04:00
return callback(err) if err?
callback(error, outputFiles) # return output files so user can check logs
return
# compile completed normally
2014-02-12 12:27:43 -05:00
return callback(error) if error?
Metrics.inc("compiles-succeeded")
for metric_key, metric_value of stats or {}
Metrics.count(metric_key, metric_value)
for metric_key, metric_value of timings or {}
Metrics.timing(metric_key, metric_value)
loadavg = os.loadavg?()
Metrics.gauge("load-avg", loadavg[0]) if loadavg?
ts = timer.done()
2016-05-27 10:29:26 -04:00
logger.log {project_id: request.project_id, user_id: request.user_id, time_taken: ts, stats:stats, timings:timings, loadavg:loadavg}, "done compile"
2016-05-10 04:12:00 -04:00
if stats?["latex-runs"] > 0
Metrics.timing("run-compile-per-pass", ts / stats["latex-runs"])
if stats?["latex-runs"] > 0 and timings?["cpu-time"] > 0
2016-05-10 04:12:00 -04:00
Metrics.timing("run-compile-cpu-time-per-pass", timings["cpu-time"] / stats["latex-runs"])
2017-08-17 09:53:35 -04:00
OutputFileFinder.findOutputFiles resourceList, compileDir, (error, outputFiles) ->
return callback(error) if error?
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
2014-02-12 12:27:43 -05:00
2016-07-14 09:47:36 -04:00
stopCompile: (project_id, user_id, callback = (error) ->) ->
compileName = getCompileName(project_id, user_id)
LatexRunner.killLatex compileName, callback
clearProject: (project_id, user_id, _callback = (error) ->) ->
callback = (error) ->
_callback(error)
_callback = () ->
compileDir = getCompileDir(project_id, user_id)
CompileManager._checkDirectory compileDir, (err, exists) ->
return callback(err) if err?
return callback() if not exists # skip removal if no directory present
proc = child_process.spawn "rm", ["-r", compileDir]
proc.on "error", callback
stderr = ""
proc.stderr.on "data", (chunk) -> stderr += chunk.toString()
proc.on "close", (code) ->
if code == 0
return callback(null)
else
return callback(new Error("rm -r #{compileDir} failed: #{stderr}"))
_findAllDirs: (callback = (error, allDirs) ->) ->
root = Settings.path.compilesDir
fs.readdir root, (err, files) ->
return callback(err) if err?
allDirs = (Path.join(root, file) for file in files)
callback(null, allDirs)
clearExpiredProjects: (max_cache_age_ms, callback = (error) ->) ->
now = Date.now()
# action for each directory
expireIfNeeded = (checkDir, cb) ->
fs.stat checkDir, (err, stats) ->
return cb() if err? # ignore errors checking directory
age = now - stats.mtime
hasExpired = (age > max_cache_age_ms)
if hasExpired then fse.remove(checkDir, cb) else cb()
# iterate over all project directories
CompileManager._findAllDirs (error, allDirs) ->
return callback() if error?
async.eachSeries allDirs, expireIfNeeded, callback
_checkDirectory: (compileDir, callback = (error, exists) ->) ->
fs.lstat compileDir, (err, stats) ->
if err?.code is 'ENOENT'
return callback(null, false) # directory does not exist
else if err?
logger.err {dir: compileDir, err:err}, "error on stat of project directory for removal"
return callback(err)
else if not stats?.isDirectory()
logger.err {dir: compileDir, stats:stats}, "bad project directory for removal"
return callback new Error("project directory is not directory")
else
callback(null, true) # directory exists
2014-04-08 10:18:56 -04:00
2016-05-27 10:29:26 -04:00
syncFromCode: (project_id, user_id, file_name, line, column, callback = (error, pdfPositions) ->) ->
2014-04-08 10:18:56 -04:00
# If LaTeX was run in a virtual environment, the file path that synctex expects
# might not match the file path on the host. The .synctex.gz file however, will be accessed
# wherever it is on the host.
2016-05-27 10:29:26 -04:00
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
2014-05-29 11:40:58 -04:00
file_path = base_dir + "/" + file_name
2016-05-27 10:29:26 -04:00
compileDir = getCompileDir(project_id, user_id)
synctex_path = Path.join(compileDir, "output.pdf")
command = ["code", synctex_path, file_path, line, column]
CompileManager._runSynctex command, (error, stdout) ->
2014-04-08 10:18:56 -04:00
return callback(error) if error?
if stdout.toLowerCase().indexOf("warning") == -1
logType = "log"
else
logType = "err"
logger[logType] project_id: project_id, user_id:user_id, file_name: file_name, line: line, column: column, command:command, stdout: stdout, "synctex code output"
2014-04-08 10:18:56 -04:00
callback null, CompileManager._parseSynctexFromCodeOutput(stdout)
2016-05-27 10:29:26 -04:00
syncFromPdf: (project_id, user_id, page, h, v, callback = (error, filePositions) ->) ->
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
compileDir = getCompileDir(project_id, user_id)
synctex_path = Path.join(compileDir, "output.pdf")
logger.log({base_dir, project_id, synctex_path}, "base diiir")
2014-04-08 10:18:56 -04:00
CompileManager._runSynctex ["pdf", synctex_path, page, h, v], (error, stdout) ->
return callback(error) if error?
2016-05-27 10:29:26 -04:00
logger.log project_id: project_id, user_id:user_id, page: page, h: h, v:v, stdout: stdout, "synctex pdf output"
2014-04-08 10:18:56 -04:00
callback null, CompileManager._parseSynctexFromPdfOutput(stdout, base_dir)
_checkFileExists: (path, callback = (error) ->) ->
synctexDir = Path.dirname(path)
synctexFile = Path.join(synctexDir, "output.synctex.gz")
fs.stat synctexDir, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output directory"))
return callback(error) if error?
fs.stat synctexFile, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output file"))
return callback(error) if error?
return callback(new Error("not a file")) if not stats?.isFile()
callback()
2014-04-08 10:18:56 -04:00
_runSynctex: (args, callback = (error, stdout) ->) ->
bin_path = Path.resolve(__dirname + "/../../bin/synctex")
seconds = 1000
outputFilePath = args[1]
CompileManager._checkFileExists outputFilePath, (error) ->
return callback(error) if error?
if Settings.clsi?.synctexCommandWrapper?
[bin_path, args] = Settings.clsi?.synctexCommandWrapper bin_path, args
logger.log({bin_path, args}, "synctex being run")
child_process.execFile bin_path, args, timeout: 10 * seconds, (error, stdout, stderr) ->
if error?
logger.err err:error, args:args, "error running synctex"
return callback(error)
callback(null, stdout)
2014-04-08 10:18:56 -04:00
_parseSynctexFromCodeOutput: (output) ->
results = []
for line in output.split("\n")
[node, page, h, v, width, height] = line.split("\t")
if node == "NODE"
results.push {
page: parseInt(page, 10)
h: parseFloat(h)
v: parseFloat(v)
height: parseFloat(height)
width: parseFloat(width)
}
return results
_parseSynctexFromPdfOutput: (output, base_dir) ->
results = []
for line in output.split("\n")
[node, file_path, line, column] = line.split("\t")
if node == "NODE"
file = file_path.slice(base_dir.length + 1)
results.push {
file: file
line: parseInt(line, 10)
column: parseInt(column, 10)
}
2015-06-08 17:35:24 -04:00
return results
2016-05-27 10:29:26 -04:00
wordcount: (project_id, user_id, file_name, image, callback = (error, pdfPositions) ->) ->
logger.log project_id:project_id, user_id:user_id, file_name:file_name, image:image, "running wordcount"
file_path = "$COMPILE_DIR/" + file_name
2016-08-11 04:40:08 -04:00
command = [ "texcount", '-nocol', '-inc', file_path, "-out=" + file_path + ".wc"]
2016-05-27 10:29:26 -04:00
directory = getCompileDir(project_id, user_id)
timeout = 10 * 1000
2016-05-27 10:29:26 -04:00
compileName = getCompileName(project_id, user_id)
CommandRunner.run compileName, command, directory, image, timeout, {}, (error) ->
return callback(error) if error?
2016-08-11 04:32:47 -04:00
fs.readFile directory + "/" + file_name + ".wc", "utf-8", (err, stdout) ->
if err?
logger.err err:err, command:command, directory:directory, project_id:project_id, user_id:user_id, "error reading word count output"
return callback(err)
2016-08-11 04:40:08 -04:00
results = CompileManager._parseWordcountFromOutput(stdout)
logger.log project_id:project_id, user_id:user_id, wordcount: results, "word count results"
callback null, results
2015-06-08 17:35:24 -04:00
_parseWordcountFromOutput: (output) ->
results = {
encode: ""
textWords: 0
headWords: 0
outside: 0
headers: 0
elements: 0
mathInline: 0
mathDisplay: 0
2016-08-11 04:40:08 -04:00
errors: 0
messages: ""
2015-06-08 17:35:24 -04:00
}
for line in output.split("\n")
[data, info] = line.split(":")
if data.indexOf("Encoding") > -1
results['encode'] = info.trim()
if data.indexOf("in text") > -1
results['textWords'] = parseInt(info, 10)
if data.indexOf("in head") > -1
results['headWords'] = parseInt(info, 10)
if data.indexOf("outside") > -1
results['outside'] = parseInt(info, 10)
if data.indexOf("of head") > -1
results['headers'] = parseInt(info, 10)
if data.indexOf("Number of floats/tables/figures") > -1
2015-06-08 17:35:24 -04:00
results['elements'] = parseInt(info, 10)
if data.indexOf("Number of math inlines") > -1
2015-06-08 17:35:24 -04:00
results['mathInline'] = parseInt(info, 10)
if data.indexOf("Number of math displayed") > -1
2015-06-08 17:35:24 -04:00
results['mathDisplay'] = parseInt(info, 10)
2016-08-11 04:40:08 -04:00
if data is "(errors" # errors reported as (errors:123)
results['errors'] = parseInt(info, 10)
if line.indexOf("!!! ") > -1 # errors logged as !!! message !!!
results['messages'] += line + "\n"
2015-06-08 17:35:24 -04:00
return results