overleaf/services/clsi/app/coffee/CompileManager.coffee
2018-09-11 10:02:24 +01:00

342 lines
14 KiB
CoffeeScript

ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
Settings = require("settings-sharelatex")
Path = require "path"
logger = require "logger-sharelatex"
Metrics = require "./Metrics"
child_process = require "child_process"
DraftModeManager = require "./DraftModeManager"
TikzManager = require "./TikzManager"
LockManager = require "./LockManager"
fs = require("fs")
fse = require "fs-extra"
os = require("os")
async = require "async"
Errors = require './Errors'
CommandRunner = require "./CommandRunner"
getCompileName = (project_id, user_id) ->
if user_id? then "#{project_id}-#{user_id}" else project_id
getCompileDir = (project_id, user_id) ->
Path.join(Settings.path.compilesDir, getCompileName(project_id, user_id))
module.exports = CompileManager =
doCompileWithLock: (request, callback = (error, outputFiles) ->) ->
compileDir = getCompileDir(request.project_id, request.user_id)
lockFile = Path.join(compileDir, ".project-lock")
# use a .project-lock file in the compile directory to prevent
# simultaneous compiles
fse.ensureDir compileDir, (error) ->
return callback(error) if error?
LockManager.runWithLock lockFile, (releaseLock) ->
CompileManager.doCompile(request, releaseLock)
, callback
doCompile: (request, callback = (error, outputFiles) ->) ->
compileDir = getCompileDir(request.project_id, request.user_id)
timer = new Metrics.Timer("write-to-disk")
logger.log project_id: request.project_id, user_id: request.user_id, "syncing resources to disk"
ResourceWriter.syncResourcesToDisk request, compileDir, (error, resourceList) ->
# NOTE: resourceList is insecure, it should only be used to exclude files from the output list
if error? and error instanceof Errors.FilesOutOfSyncError
logger.warn project_id: request.project_id, user_id: request.user_id, "files out of sync, please retry"
return callback(error)
else if error?
logger.err err:error, project_id: request.project_id, user_id: request.user_id, "error writing resources to disk"
return callback(error)
logger.log project_id: request.project_id, user_id: request.user_id, time_taken: Date.now() - timer.start, "written files to disk"
timer.done()
injectDraftModeIfRequired = (callback) ->
if request.draft
DraftModeManager.injectDraftMode Path.join(compileDir, request.rootResourcePath), callback
else
callback()
createTikzFileIfRequired = (callback) ->
TikzManager.checkMainFile compileDir, request.rootResourcePath, resourceList, (error, usesTikzExternalize) ->
return callback(error) if error?
if usesTikzExternalize
TikzManager.injectOutputFile compileDir, request.rootResourcePath, callback
else
callback()
# set up environment variables for chktex
env = {}
# only run chktex on LaTeX files (not knitr .Rtex files or any others)
isLaTeXFile = request.rootResourcePath?.match(/\.tex$/i)
if request.check? and isLaTeXFile
env['CHKTEX_OPTIONS'] = '-nall -e9 -e10 -w15 -w16'
env['CHKTEX_ULIMIT_OPTIONS'] = '-t 5 -v 64000'
if request.check is 'error'
env['CHKTEX_EXIT_ON_ERROR'] = 1
if request.check is 'validate'
env['CHKTEX_VALIDATE'] = 1
# apply a series of file modifications/creations for draft mode and tikz
async.series [injectDraftModeIfRequired, createTikzFileIfRequired], (error) ->
return callback(error) if error?
timer = new Metrics.Timer("run-compile")
# find the image tag to log it as a metric, e.g. 2015.1 (convert . to - for graphite)
tag = request.imageName?.match(/:(.*)/)?[1]?.replace(/\./g,'-') or "default"
tag = "other" if not request.project_id.match(/^[0-9a-f]{24}$/) # exclude smoke test
Metrics.inc("compiles")
Metrics.inc("compiles-with-image.#{tag}")
compileName = getCompileName(request.project_id, request.user_id)
LatexRunner.runLatex compileName, {
directory: compileDir
mainFile: request.rootResourcePath
compiler: request.compiler
timeout: request.timeout
image: request.imageName
environment: env
}, (error, output, stats, timings) ->
# request was for validation only
if request.check is "validate"
result = if error?.code then "fail" else "pass"
error = new Error("validation")
error.validate = result
# request was for compile, and failed on validation
if request.check is "error" and error?.message is 'exited'
error = new Error("compilation")
error.validate = "fail"
# compile was killed by user, was a validation, or a compile which failed validation
if error?.terminated or error?.validate
OutputFileFinder.findOutputFiles resourceList, compileDir, (err, outputFiles) ->
return callback(err) if err?
callback(error, outputFiles) # return output files so user can check logs
return
# compile completed normally
return callback(error) if error?
Metrics.inc("compiles-succeeded")
for metric_key, metric_value of stats or {}
Metrics.count(metric_key, metric_value)
for metric_key, metric_value of timings or {}
Metrics.timing(metric_key, metric_value)
loadavg = os.loadavg?()
Metrics.gauge("load-avg", loadavg[0]) if loadavg?
ts = timer.done()
logger.log {project_id: request.project_id, user_id: request.user_id, time_taken: ts, stats:stats, timings:timings, loadavg:loadavg}, "done compile"
if stats?["latex-runs"] > 0
Metrics.timing("run-compile-per-pass", ts / stats["latex-runs"])
if stats?["latex-runs"] > 0 and timings?["cpu-time"] > 0
Metrics.timing("run-compile-cpu-time-per-pass", timings["cpu-time"] / stats["latex-runs"])
OutputFileFinder.findOutputFiles resourceList, compileDir, (error, outputFiles) ->
return callback(error) if error?
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
stopCompile: (project_id, user_id, callback = (error) ->) ->
compileName = getCompileName(project_id, user_id)
LatexRunner.killLatex compileName, callback
clearProject: (project_id, user_id, _callback = (error) ->) ->
callback = (error) ->
_callback(error)
_callback = () ->
compileDir = getCompileDir(project_id, user_id)
CompileManager._checkDirectory compileDir, (err, exists) ->
return callback(err) if err?
return callback() if not exists # skip removal if no directory present
proc = child_process.spawn "rm", ["-r", compileDir]
proc.on "error", callback
stderr = ""
proc.stderr.on "data", (chunk) -> stderr += chunk.toString()
proc.on "close", (code) ->
if code == 0
return callback(null)
else
return callback(new Error("rm -r #{compileDir} failed: #{stderr}"))
_findAllDirs: (callback = (error, allDirs) ->) ->
root = Settings.path.compilesDir
fs.readdir root, (err, files) ->
return callback(err) if err?
allDirs = (Path.join(root, file) for file in files)
callback(null, allDirs)
clearExpiredProjects: (max_cache_age_ms, callback = (error) ->) ->
now = Date.now()
# action for each directory
expireIfNeeded = (checkDir, cb) ->
fs.stat checkDir, (err, stats) ->
return cb() if err? # ignore errors checking directory
age = now - stats.mtime
hasExpired = (age > max_cache_age_ms)
if hasExpired then fse.remove(checkDir, cb) else cb()
# iterate over all project directories
CompileManager._findAllDirs (error, allDirs) ->
return callback() if error?
async.eachSeries allDirs, expireIfNeeded, callback
_checkDirectory: (compileDir, callback = (error, exists) ->) ->
fs.lstat compileDir, (err, stats) ->
if err?.code is 'ENOENT'
return callback(null, false) # directory does not exist
else if err?
logger.err {dir: compileDir, err:err}, "error on stat of project directory for removal"
return callback(err)
else if not stats?.isDirectory()
logger.err {dir: compileDir, stats:stats}, "bad project directory for removal"
return callback new Error("project directory is not directory")
else
callback(null, true) # directory exists
syncFromCode: (project_id, user_id, file_name, line, column, callback = (error, pdfPositions) ->) ->
# If LaTeX was run in a virtual environment, the file path that synctex expects
# might not match the file path on the host. The .synctex.gz file however, will be accessed
# wherever it is on the host.
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
file_path = base_dir + "/" + file_name
compileDir = getCompileDir(project_id, user_id)
synctex_path = "#{base_dir}/output.pdf"
command = ["code", synctex_path, file_path, line, column]
fse.ensureDir compileDir, (error) ->
if error?
logger.err {error, project_id, user_id, file_name}, "error ensuring dir for sync from code"
return callback(error)
CompileManager._runSynctex project_id, user_id, command, (error, stdout) ->
return callback(error) if error?
logger.log project_id: project_id, user_id:user_id, file_name: file_name, line: line, column: column, command:command, stdout: stdout, "synctex code output"
callback null, CompileManager._parseSynctexFromCodeOutput(stdout)
syncFromPdf: (project_id, user_id, page, h, v, callback = (error, filePositions) ->) ->
compileName = getCompileName(project_id, user_id)
compileDir = getCompileDir(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
synctex_path = "#{base_dir}/output.pdf"
command = ["pdf", synctex_path, page, h, v]
fse.ensureDir compileDir, (error) ->
if error?
logger.err {error, project_id, user_id, file_name}, "error ensuring dir for sync to code"
return callback(error)
CompileManager._runSynctex project_id, user_id, command, (error, stdout) ->
return callback(error) if error?
logger.log project_id: project_id, user_id:user_id, page: page, h: h, v:v, stdout: stdout, "synctex pdf output"
callback null, CompileManager._parseSynctexFromPdfOutput(stdout, base_dir)
_checkFileExists: (path, callback = (error) ->) ->
synctexDir = Path.dirname(path)
synctexFile = Path.join(synctexDir, "output.synctex.gz")
fs.stat synctexDir, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output directory"))
return callback(error) if error?
fs.stat synctexFile, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output file"))
return callback(error) if error?
return callback(new Error("not a file")) if not stats?.isFile()
callback()
_runSynctex: (project_id, user_id, command, callback = (error, stdout) ->) ->
seconds = 1000
command.unshift("/opt/synctex")
directory = getCompileDir(project_id, user_id)
timeout = 60 * 1000 # increased to allow for large projects
compileName = getCompileName(project_id, user_id)
CommandRunner.run compileName, command, directory, Settings.clsi.docker.image, timeout, {}, (error, output) ->
if error?
logger.err err:error, command:command, project_id:project_id, user_id:user_id, "error running synctex"
return callback(error)
callback(null, output.stdout)
_parseSynctexFromCodeOutput: (output) ->
results = []
for line in output.split("\n")
[node, page, h, v, width, height] = line.split("\t")
if node == "NODE"
results.push {
page: parseInt(page, 10)
h: parseFloat(h)
v: parseFloat(v)
height: parseFloat(height)
width: parseFloat(width)
}
return results
_parseSynctexFromPdfOutput: (output, base_dir) ->
results = []
for line in output.split("\n")
[node, file_path, line, column] = line.split("\t")
if node == "NODE"
file = file_path.slice(base_dir.length + 1)
results.push {
file: file
line: parseInt(line, 10)
column: parseInt(column, 10)
}
return results
wordcount: (project_id, user_id, file_name, image, callback = (error, pdfPositions) ->) ->
logger.log project_id:project_id, user_id:user_id, file_name:file_name, image:image, "running wordcount"
file_path = "$COMPILE_DIR/" + file_name
command = [ "texcount", '-nocol', '-inc', file_path, "-out=" + file_path + ".wc"]
directory = getCompileDir(project_id, user_id)
timeout = 60 * 1000 # increased to allow for large projects
compileName = getCompileName(project_id, user_id)
fse.ensureDir compileDir, (error) ->
if error?
logger.err {error, project_id, user_id, file_name}, "error ensuring dir for sync from code"
return callback(error)
CommandRunner.run compileName, command, compileDir, image, timeout, {}, (error) ->
return callback(error) if error?
fs.readFile compileDir + "/" + file_name + ".wc", "utf-8", (err, stdout) ->
if err?
#call it node_err so sentry doesn't use random path error as unique id so it can't be ignored
logger.err node_err:err, command:command, compileDir:compileDir, project_id:project_id, user_id:user_id, "error reading word count output"
return callback(err)
results = CompileManager._parseWordcountFromOutput(stdout)
logger.log project_id:project_id, user_id:user_id, wordcount: results, "word count results"
callback null, results
_parseWordcountFromOutput: (output) ->
results = {
encode: ""
textWords: 0
headWords: 0
outside: 0
headers: 0
elements: 0
mathInline: 0
mathDisplay: 0
errors: 0
messages: ""
}
for line in output.split("\n")
[data, info] = line.split(":")
if data.indexOf("Encoding") > -1
results['encode'] = info.trim()
if data.indexOf("in text") > -1
results['textWords'] = parseInt(info, 10)
if data.indexOf("in head") > -1
results['headWords'] = parseInt(info, 10)
if data.indexOf("outside") > -1
results['outside'] = parseInt(info, 10)
if data.indexOf("of head") > -1
results['headers'] = parseInt(info, 10)
if data.indexOf("Number of floats/tables/figures") > -1
results['elements'] = parseInt(info, 10)
if data.indexOf("Number of math inlines") > -1
results['mathInline'] = parseInt(info, 10)
if data.indexOf("Number of math displayed") > -1
results['mathDisplay'] = parseInt(info, 10)
if data is "(errors" # errors reported as (errors:123)
results['errors'] = parseInt(info, 10)
if line.indexOf("!!! ") > -1 # errors logged as !!! message !!!
results['messages'] += line + "\n"
return results