overleaf/services/clsi/app/coffee/CompileManager.coffee

246 lines
9.8 KiB
CoffeeScript
Raw Normal View History

2014-02-12 12:27:43 -05:00
ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
2014-02-12 12:27:43 -05:00
Settings = require("settings-sharelatex")
Path = require "path"
logger = require "logger-sharelatex"
Metrics = require "./Metrics"
child_process = require "child_process"
DraftModeManager = require "./DraftModeManager"
2015-09-08 09:19:46 -04:00
fs = require("fs")
fse = require "fs-extra"
os = require("os")
async = require "async"
2014-02-12 12:27:43 -05:00
2016-05-24 09:08:39 -04:00
commandRunner = Settings.clsi?.commandRunner or "./CommandRunner"
logger.info commandRunner:commandRunner, "selecting command runner for clsi"
CommandRunner = require(commandRunner)
2016-05-27 10:29:26 -04:00
getCompileName = (project_id, user_id) ->
if user_id? then "#{project_id}-#{user_id}" else project_id
getCompileDir = (project_id, user_id) ->
Path.join(Settings.path.compilesDir, getCompileName(project_id, user_id))
2014-02-12 12:27:43 -05:00
module.exports = CompileManager =
doCompile: (request, callback = (error, outputFiles) ->) ->
2016-05-27 10:29:26 -04:00
compileDir = getCompileDir(request.project_id, request.user_id)
2014-02-12 12:27:43 -05:00
timer = new Metrics.Timer("write-to-disk")
2016-05-27 10:29:26 -04:00
logger.log project_id: request.project_id, user_id: request.user_id, "starting compile"
2014-02-12 12:27:43 -05:00
ResourceWriter.syncResourcesToDisk request.project_id, request.resources, compileDir, (error) ->
2016-05-27 09:45:39 -04:00
if error?
2016-05-27 10:29:26 -04:00
logger.err err:error, project_id: request.project_id, user_id: request.user_id, "error writing resources to disk"
2016-05-27 09:45:39 -04:00
return callback(error)
2016-05-27 10:29:26 -04:00
logger.log project_id: request.project_id, user_id: request.user_id, time_taken: Date.now() - timer.start, "written files to disk"
2014-02-12 12:27:43 -05:00
timer.done()
injectDraftModeIfRequired = (callback) ->
if request.draft
DraftModeManager.injectDraftMode Path.join(compileDir, request.rootResourcePath), callback
else
callback()
injectDraftModeIfRequired (error) ->
2014-02-12 12:27:43 -05:00
return callback(error) if error?
timer = new Metrics.Timer("run-compile")
2016-05-10 04:41:39 -04:00
# find the image tag to log it as a metric, e.g. 2015.1 (convert . to - for graphite)
tag = request.imageName?.match(/:(.*)/)?[1]?.replace(/\./g,'-') or "default"
tag = "other" if not request.project_id.match(/^[0-9a-f]{24}$/) # exclude smoke test
Metrics.inc("compiles")
Metrics.inc("compiles-with-image.#{tag}")
2016-05-27 10:29:26 -04:00
compileName = getCompileName(request.project_id, request.user_id)
LatexRunner.runLatex compileName, {
directory: compileDir
mainFile: request.rootResourcePath
compiler: request.compiler
timeout: request.timeout
image: request.imageName
}, (error, output, stats, timings) ->
2014-02-12 12:27:43 -05:00
return callback(error) if error?
Metrics.inc("compiles-succeeded")
for metric_key, metric_value of stats or {}
Metrics.count(metric_key, metric_value)
for metric_key, metric_value of timings or {}
Metrics.timing(metric_key, metric_value)
loadavg = os.loadavg?()
Metrics.gauge("load-avg", loadavg[0]) if loadavg?
ts = timer.done()
2016-05-27 10:29:26 -04:00
logger.log {project_id: request.project_id, user_id: request.user_id, time_taken: ts, stats:stats, timings:timings, loadavg:loadavg}, "done compile"
2016-05-10 04:12:00 -04:00
if stats?["latex-runs"] > 0
Metrics.timing("run-compile-per-pass", ts / stats["latex-runs"])
if stats?["latex-runs"] > 0 and timings?["cpu-time"] > 0
2016-05-10 04:12:00 -04:00
Metrics.timing("run-compile-cpu-time-per-pass", timings["cpu-time"] / stats["latex-runs"])
OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) ->
return callback(error) if error?
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
2014-02-12 12:27:43 -05:00
clearProject: (project_id, user_id, _callback = (error) ->) ->
callback = (error) ->
_callback(error)
_callback = () ->
compileDir = getCompileDir(project_id, user_id)
CompileManager._checkDirectory compileDir, (err, exists) ->
return callback(err) if err?
return callback() if not exists # skip removal if no directory present
proc = child_process.spawn "rm", ["-r", compileDir]
proc.on "error", callback
stderr = ""
proc.stderr.on "data", (chunk) -> stderr += chunk.toString()
proc.on "close", (code) ->
if code == 0
return callback(null)
else
return callback(new Error("rm -r #{compileDir} failed: #{stderr}"))
_findAllDirs: (callback = (error, allDirs) ->) ->
root = Settings.path.compilesDir
fs.readdir root, (err, files) ->
return callback(err) if err?
allDirs = (Path.join(root, file) for file in files)
callback(null, allDirs)
clearExpiredProjects: (max_cache_age_ms, callback = (error) ->) ->
now = Date.now()
# action for each directory
expireIfNeeded = (checkDir, cb) ->
fs.stat checkDir, (err, stats) ->
return cb() if err? # ignore errors checking directory
age = now - stats.mtime
hasExpired = (age > max_cache_age_ms)
if hasExpired then fse.remove(checkDir, cb) else cb()
# iterate over all project directories
CompileManager._findAllDirs (error, allDirs) ->
return callback() if error?
async.eachSeries allDirs, expireIfNeeded, callback
_checkDirectory: (compileDir, callback = (error, exists) ->) ->
fs.lstat compileDir, (err, stats) ->
if err?.code is 'ENOENT'
return callback(null, false) # directory does not exist
else if err?
logger.err {dir: compileDir, err:err}, "error on stat of project directory for removal"
return callback(err)
else if not stats?.isDirectory()
logger.err {dir: compileDir, stats:stats}, "bad project directory for removal"
return callback new Error("project directory is not directory")
else
callback(null, true) # directory exists
2014-04-08 10:18:56 -04:00
2016-05-27 10:29:26 -04:00
syncFromCode: (project_id, user_id, file_name, line, column, callback = (error, pdfPositions) ->) ->
2014-04-08 10:18:56 -04:00
# If LaTeX was run in a virtual environment, the file path that synctex expects
# might not match the file path on the host. The .synctex.gz file however, will be accessed
# wherever it is on the host.
2016-05-27 10:29:26 -04:00
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
2014-05-29 11:40:58 -04:00
file_path = base_dir + "/" + file_name
2016-05-27 10:29:26 -04:00
compileDir = getCompileDir(project_id, user_id)
synctex_path = Path.join(compileDir, "output.pdf")
2014-04-08 10:18:56 -04:00
CompileManager._runSynctex ["code", synctex_path, file_path, line, column], (error, stdout) ->
return callback(error) if error?
2016-05-27 10:29:26 -04:00
logger.log project_id: project_id, user_id:user_id, file_name: file_name, line: line, column: column, stdout: stdout, "synctex code output"
2014-04-08 10:18:56 -04:00
callback null, CompileManager._parseSynctexFromCodeOutput(stdout)
2016-05-27 10:29:26 -04:00
syncFromPdf: (project_id, user_id, page, h, v, callback = (error, filePositions) ->) ->
compileName = getCompileName(project_id, user_id)
base_dir = Settings.path.synctexBaseDir(compileName)
compileDir = getCompileDir(project_id, user_id)
synctex_path = Path.join(compileDir, "output.pdf")
2014-04-08 10:18:56 -04:00
CompileManager._runSynctex ["pdf", synctex_path, page, h, v], (error, stdout) ->
return callback(error) if error?
2016-05-27 10:29:26 -04:00
logger.log project_id: project_id, user_id:user_id, page: page, h: h, v:v, stdout: stdout, "synctex pdf output"
2014-04-08 10:18:56 -04:00
callback null, CompileManager._parseSynctexFromPdfOutput(stdout, base_dir)
_runSynctex: (args, callback = (error, stdout) ->) ->
bin_path = Path.resolve(__dirname + "/../../bin/synctex")
seconds = 1000
child_process.execFile bin_path, args, timeout: 10 * seconds, (error, stdout, stderr) ->
2016-05-27 11:18:18 -04:00
if error?
logger.err err:error, args:args, "error running synctex"
return callback(error)
callback(null, stdout)
2014-04-08 10:18:56 -04:00
_parseSynctexFromCodeOutput: (output) ->
results = []
for line in output.split("\n")
[node, page, h, v, width, height] = line.split("\t")
if node == "NODE"
results.push {
page: parseInt(page, 10)
h: parseFloat(h)
v: parseFloat(v)
height: parseFloat(height)
width: parseFloat(width)
}
return results
_parseSynctexFromPdfOutput: (output, base_dir) ->
results = []
for line in output.split("\n")
[node, file_path, line, column] = line.split("\t")
if node == "NODE"
file = file_path.slice(base_dir.length + 1)
results.push {
file: file
line: parseInt(line, 10)
column: parseInt(column, 10)
}
2015-06-08 17:35:24 -04:00
return results
2016-05-27 10:29:26 -04:00
wordcount: (project_id, user_id, file_name, image, callback = (error, pdfPositions) ->) ->
logger.log project_id:project_id, user_id:user_id, file_name:file_name, image:image, "running wordcount"
file_path = "$COMPILE_DIR/" + file_name
command = [ "texcount", '-inc', file_path, "-out=" + file_path + ".wc"]
2016-05-27 10:29:26 -04:00
directory = getCompileDir(project_id, user_id)
timeout = 10 * 1000
2016-05-27 10:29:26 -04:00
compileName = getCompileName(project_id, user_id)
2016-05-27 10:29:26 -04:00
CommandRunner.run compileName, command, directory, image, timeout, (error) ->
return callback(error) if error?
try
stdout = fs.readFileSync(directory + "/" + file_name + ".wc", "utf-8")
catch err
2016-05-27 10:29:26 -04:00
logger.err err:err, command:command, directory:directory, project_id:project_id, user_id:user_id, "error reading word count output"
return callback(err)
callback null, CompileManager._parseWordcountFromOutput(stdout)
2015-06-08 17:35:24 -04:00
_parseWordcountFromOutput: (output) ->
results = {
encode: ""
textWords: 0
headWords: 0
outside: 0
headers: 0
elements: 0
mathInline: 0
mathDisplay: 0
}
for line in output.split("\n")
[data, info] = line.split(":")
if data.indexOf("Encoding") > -1
results['encode'] = info.trim()
if data.indexOf("in text") > -1
results['textWords'] = parseInt(info, 10)
if data.indexOf("in head") > -1
results['headWords'] = parseInt(info, 10)
if data.indexOf("outside") > -1
results['outside'] = parseInt(info, 10)
if data.indexOf("of head") > -1
results['headers'] = parseInt(info, 10)
if data.indexOf("Number of floats/tables/figures") > -1
2015-06-08 17:35:24 -04:00
results['elements'] = parseInt(info, 10)
if data.indexOf("Number of math inlines") > -1
2015-06-08 17:35:24 -04:00
results['mathInline'] = parseInt(info, 10)
if data.indexOf("Number of math displayed") > -1
2015-06-08 17:35:24 -04:00
results['mathDisplay'] = parseInt(info, 10)
return results