overleaf/services/clsi/app/coffee/CompileManager.coffee
Henry Oswald 9a519f0d3d added docker runner into core codebase
supports both local command runner and docker runner

added docker files for tex live

also fixed tests so they exit correctly & removed debug lines
2018-03-14 15:44:49 +00:00

334 lines
14 KiB
CoffeeScript

ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
Settings = require("settings-sharelatex")
Path = require "path"
logger = require "logger-sharelatex"
Metrics = require "./Metrics"
child_process = require "child_process"
DraftModeManager = require "./DraftModeManager"
TikzManager = require "./TikzManager"
LockManager = require "./LockManager"
fs = require("fs")
fse = require "fs-extra"
os = require("os")
async = require "async"
Errors = require './Errors'
CommandRunner = require "./CommandRunner"
getCompileName = (project_id, user_id) ->
if user_id? then "#{project_id}-#{user_id}" else project_id
getCompileDir = (project_id, user_id) ->
Path.join(Settings.path.compilesDir, getCompileName(project_id, user_id))
module.exports = CompileManager =
doCompileWithLock: (request, callback = (error, outputFiles) ->) ->
compileDir = getCompileDir(request.project_id, request.user_id)
lockFile = Path.join(compileDir, ".project-lock")
# use a .project-lock file in the compile directory to prevent
# simultaneous compiles
fse.ensureDir compileDir, (error) ->
return callback(error) if error?
LockManager.runWithLock lockFile, (releaseLock) ->
CompileManager.doCompile(request, releaseLock)
, callback
doCompile: (request, callback = (error, outputFiles) ->) ->
compileDir = getCompileDir(request.project_id, request.user_id)
timer = new Metrics.Timer("write-to-disk")
logger.log project_id: request.project_id, user_id: request.user_id, "syncing resources to disk"
ResourceWriter.syncResourcesToDisk request, compileDir, (error, resourceList) ->
# NOTE: resourceList is insecure, it should only be used to exclude files from the output list
if error? and error instanceof Errors.FilesOutOfSyncError
logger.warn project_id: request.project_id, user_id: request.user_id, "files out of sync, please retry"
return callback(error)
else if error?
logger.err err:error, project_id: request.project_id, user_id: request.user_id, "error writing resources to disk"
return callback(error)
logger.log project_id: request.project_id, user_id: request.user_id, time_taken: Date.now() - timer.start, "written files to disk"
timer.done()
injectDraftModeIfRequired = (callback) ->
if request.draft
DraftModeManager.injectDraftMode Path.join(compileDir, request.rootResourcePath), callback
else
callback()
createTikzFileIfRequired = (callback) ->
TikzManager.checkMainFile compileDir, request.rootResourcePath, resourceList, (error, usesTikzExternalize) ->
return callback(error) if error?
if usesTikzExternalize
TikzManager.injectOutputFile compileDir, request.rootResourcePath, callback
else
callback()
# set up environment variables for chktex
env = {}
# only run chktex on LaTeX files (not knitr .Rtex files or any others)
isLaTeXFile = request.rootResourcePath?.match(/\.tex$/i)
if request.check? and isLaTeXFile
env['CHKTEX_OPTIONS'] = '-nall -e9 -e10 -w15 -w16'
env['CHKTEX_ULIMIT_OPTIONS'] = '-t 5 -v 64000'
if request.check is 'error'
env['CHKTEX_EXIT_ON_ERROR'] = 1
if request.check is 'validate'
env['CHKTEX_VALIDATE'] = 1
# apply a series of file modifications/creations for draft mode and tikz
async.series [injectDraftModeIfRequired, createTikzFileIfRequired], (error) ->
return callback(error) if error?
timer = new Metrics.Timer("run-compile")
# find the image tag to log it as a metric, e.g. 2015.1 (convert . to - for graphite)
tag = request.imageName?.match(/:(.*)/)?[1]?.replace(/\./g,'-') or "default"
tag = "other" if not request.project_id.match(/^[0-9a-f]{24}$/) # exclude smoke test
Metrics.inc("compiles")
Metrics.inc("compiles-with-image.#{tag}")
compileName = getCompileName(request.project_id, request.user_id)
LatexRunner.runLatex compileName, {
directory: compileDir
mainFile: request.rootResourcePath
compiler: request.compiler
timeout: request.timeout
image: request.imageName
environment: env
}, (error, output, stats, timings) ->
# request was for validation only
if request.check is "validate"
result = if error?.code then "fail" else "pass"
error = new Error("validation")
error.validate = result
# request was for compile, and failed on validation
if request.check is "error" and error?.message is 'exited'
error = new Error("compilation")
error.validate = "fail"
# compile was killed by user, was a validation, or a compile which failed validation
if error?.terminated or error?.validate
OutputFileFinder.findOutputFiles resourceList, compileDir, (err, outputFiles) ->
return callback(err) if err?
callback(error, outputFiles) # return output files so user can check logs
return
# compile completed normally
return callback(error) if error?
Metrics.inc("compiles-succeeded")
for metric_key, metric_value of stats or {}
Metrics.count(metric_key, metric_value)
for metric_key, metric_value of timings or {}
Metrics.timing(metric_key, metric_value)
loadavg = os.loadavg?()
Metrics.gauge("load-avg", loadavg[0]) if loadavg?
ts = timer.done()
logger.log {project_id: request.project_id, user_id: request.user_id, time_taken: ts, stats:stats, timings:timings, loadavg:loadavg}, "done compile"
if stats?["latex-runs"] > 0
Metrics.timing("run-compile-per-pass", ts / stats["latex-runs"])
if stats?["latex-runs"] > 0 and timings?["cpu-time"] > 0
Metrics.timing("run-compile-cpu-time-per-pass", timings["cpu-time"] / stats["latex-runs"])
OutputFileFinder.findOutputFiles resourceList, compileDir, (error, outputFiles) ->
return callback(error) if error?
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
stopCompile: (project_id, user_id, callback = (error) ->) ->
compileName = getCompileName(project_id, user_id)
LatexRunner.killLatex compileName, callback
clearProject: (project_id, user_id, _callback = (error) ->) ->
callback = (error) ->
_callback(error)
_callback = () ->
compileDir = getCompileDir(project_id, user_id)
CompileManager._checkDirectory compileDir, (err, exists) ->
return callback(err) if err?
return callback() if not exists # skip removal if no directory present
proc = child_process.spawn "rm", ["-r", compileDir]
proc.on "error", callback
stderr = ""
proc.stderr.on "data", (chunk) -> stderr += chunk.toString()
proc.on "close", (code) ->
if code == 0
return callback(null)
else
return callback(new Error("rm -r #{compileDir} failed: #{stderr}"))
_findAllDirs: (callback = (error, allDirs) ->) ->
root = Settings.path.compilesDir
fs.readdir root, (err, files) ->
return callback(err) if err?
allDirs = (Path.join(root, file) for file in files)
callback(null, allDirs)
clearExpiredProjects: (max_cache_age_ms, callback = (error) ->) ->
now = Date.now()
# action for each directory
expireIfNeeded = (checkDir, cb) ->
fs.stat checkDir, (err, stats) ->
return cb() if err? # ignore errors checking directory
age = now - stats.mtime
hasExpired = (age > max_cache_age_ms)
if hasExpired then fse.remove(checkDir, cb) else cb()
# iterate over all project directories
CompileManager._findAllDirs (error, allDirs) ->
return callback() if error?
async.eachSeries allDirs, expireIfNeeded, callback
_checkDirectory: (compileDir, callback = (error, exists) ->) ->
fs.lstat compileDir, (err, stats) ->
if err?.code is 'ENOENT'
return callback(null, false) # directory does not exist
else if err?
logger.err {dir: compileDir, err:err}, "error on stat of project directory for removal"
return callback(err)
else if not stats?.isDirectory()
logger.err {dir: compileDir, stats:stats}, "bad project directory for removal"
return callback new Error("project directory is not directory")
else
callback(null, true) # directory exists
syncFromCode: (project_id, user_id, file_name, line, column, callback = (error, pdfPositions) ->) ->
# If LaTeX was run in a virtual environment, the file path that synctex expects
# might not match the file path on the host. The .synctex.gz file however, will be accessed
# wherever it is on the host.
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
file_path = base_dir + "/" + file_name
compileDir = getCompileDir(project_id, user_id)
synctex_path = "#{base_dir}/output.pdf"
command = ["code", synctex_path, file_path, line, column]
CompileManager._runSynctex project_id, user_id, command, (error, stdout) ->
return callback(error) if error?
if stdout.toLowerCase().indexOf("warning") == -1
logType = "log"
else
logType = "err"
logger[logType] project_id: project_id, user_id:user_id, file_name: file_name, line: line, column: column, command:command, stdout: stdout, "synctex code output"
callback null, CompileManager._parseSynctexFromCodeOutput(stdout)
syncFromPdf: (project_id, user_id, page, h, v, callback = (error, filePositions) ->) ->
compileName = getCompileName(project_id, user_id)
compileDir = getCompileDir(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
synctex_path = "#{base_dir}/output.pdf"
command = ["pdf", synctex_path, page, h, v]
CompileManager._runSynctex project_id, user_id, command, (error, stdout) ->
return callback(error) if error?
logger.log project_id: project_id, user_id:user_id, page: page, h: h, v:v, stdout: stdout, "synctex pdf output"
callback null, CompileManager._parseSynctexFromPdfOutput(stdout, base_dir)
_checkFileExists: (path, callback = (error) ->) ->
synctexDir = Path.dirname(path)
synctexFile = Path.join(synctexDir, "output.synctex.gz")
fs.stat synctexDir, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output directory"))
return callback(error) if error?
fs.stat synctexFile, (error, stats) ->
if error?.code is 'ENOENT'
return callback(new Errors.NotFoundError("called synctex with no output file"))
return callback(error) if error?
return callback(new Error("not a file")) if not stats?.isFile()
callback()
_runSynctex: (project_id, user_id, command, callback = (error, stdout) ->) ->
seconds = 1000
command.unshift("/opt/synctex")
directory = getCompileDir(project_id, user_id)
timeout = 10 * 1000
compileName = getCompileName(project_id, user_id)
CommandRunner.run compileName, command, directory, Settings.clsi.docker.image, timeout, {}, (error, output) ->
if error?
logger.err err:error, command:command, "error running synctex"
return callback(error)
callback(null, output.stdout)
_parseSynctexFromCodeOutput: (output) ->
results = []
for line in output.split("\n")
[node, page, h, v, width, height] = line.split("\t")
if node == "NODE"
results.push {
page: parseInt(page, 10)
h: parseFloat(h)
v: parseFloat(v)
height: parseFloat(height)
width: parseFloat(width)
}
return results
_parseSynctexFromPdfOutput: (output, base_dir) ->
results = []
for line in output.split("\n")
[node, file_path, line, column] = line.split("\t")
if node == "NODE"
file = file_path.slice(base_dir.length + 1)
results.push {
file: file
line: parseInt(line, 10)
column: parseInt(column, 10)
}
return results
wordcount: (project_id, user_id, file_name, image, callback = (error, pdfPositions) ->) ->
logger.log project_id:project_id, user_id:user_id, file_name:file_name, image:image, "running wordcount"
file_path = "$COMPILE_DIR/" + file_name
command = [ "texcount", '-nocol', '-inc', file_path, "-out=" + file_path + ".wc"]
directory = getCompileDir(project_id, user_id)
timeout = 10 * 1000
compileName = getCompileName(project_id, user_id)
CommandRunner.run compileName, command, directory, image, timeout, {}, (error) ->
return callback(error) if error?
fs.readFile directory + "/" + file_name + ".wc", "utf-8", (err, stdout) ->
if err?
logger.err err:err, command:command, directory:directory, project_id:project_id, user_id:user_id, "error reading word count output"
return callback(err)
results = CompileManager._parseWordcountFromOutput(stdout)
logger.log project_id:project_id, user_id:user_id, wordcount: results, "word count results"
callback null, results
_parseWordcountFromOutput: (output) ->
results = {
encode: ""
textWords: 0
headWords: 0
outside: 0
headers: 0
elements: 0
mathInline: 0
mathDisplay: 0
errors: 0
messages: ""
}
for line in output.split("\n")
[data, info] = line.split(":")
if data.indexOf("Encoding") > -1
results['encode'] = info.trim()
if data.indexOf("in text") > -1
results['textWords'] = parseInt(info, 10)
if data.indexOf("in head") > -1
results['headWords'] = parseInt(info, 10)
if data.indexOf("outside") > -1
results['outside'] = parseInt(info, 10)
if data.indexOf("of head") > -1
results['headers'] = parseInt(info, 10)
if data.indexOf("Number of floats/tables/figures") > -1
results['elements'] = parseInt(info, 10)
if data.indexOf("Number of math inlines") > -1
results['mathInline'] = parseInt(info, 10)
if data.indexOf("Number of math displayed") > -1
results['mathDisplay'] = parseInt(info, 10)
if data is "(errors" # errors reported as (errors:123)
results['errors'] = parseInt(info, 10)
if line.indexOf("!!! ") > -1 # errors logged as !!! message !!!
results['messages'] += line + "\n"
return results