mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
191 lines
7.4 KiB
CoffeeScript
191 lines
7.4 KiB
CoffeeScript
async = require "async"
|
|
fs = require "fs"
|
|
fse = require "fs-extra"
|
|
Path = require "path"
|
|
logger = require "logger-sharelatex"
|
|
_ = require "underscore"
|
|
Settings = require "settings-sharelatex"
|
|
crypto = require "crypto"
|
|
|
|
OutputFileOptimiser = require "./OutputFileOptimiser"
|
|
|
|
module.exports = OutputCacheManager =
|
|
CACHE_SUBDIR: '.cache/clsi'
|
|
ARCHIVE_SUBDIR: '.archive/clsi'
|
|
# build id is HEXDATE-HEXRANDOM from Date.now()and RandomBytes
|
|
# for backwards compatibility, make the randombytes part optional
|
|
BUILD_REGEX: /^[0-9a-f]+(-[0-9a-f]+)?$/
|
|
CACHE_LIMIT: 2 # maximum number of cache directories
|
|
CACHE_AGE: 60*60*1000 # up to one hour old
|
|
|
|
path: (buildId, file) ->
|
|
# used by static server, given build id return '.cache/clsi/buildId'
|
|
if buildId.match OutputCacheManager.BUILD_REGEX
|
|
return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file)
|
|
else
|
|
# for invalid build id, return top level
|
|
return file
|
|
|
|
generateBuildId: (callback = (error, buildId) ->) ->
|
|
# generate a secure build id from Date.now() and 8 random bytes in hex
|
|
crypto.randomBytes 8, (err, buf) ->
|
|
return callback(err) if err?
|
|
random = buf.toString('hex')
|
|
date = Date.now().toString(16)
|
|
callback err, "#{date}-#{random}"
|
|
|
|
saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) ->
|
|
OutputCacheManager.generateBuildId (err, buildId) ->
|
|
return callback(err) if err?
|
|
OutputCacheManager.saveOutputFilesInBuildDir outputFiles, compileDir, buildId, callback
|
|
|
|
saveOutputFilesInBuildDir: (outputFiles, compileDir, buildId, callback = (error) ->) ->
|
|
# make a compileDir/CACHE_SUBDIR/build_id directory and
|
|
# copy all the output files into it
|
|
cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR)
|
|
# Put the files into a new cache subdirectory
|
|
cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId)
|
|
# Is it a per-user compile? check if compile directory is PROJECTID-USERID
|
|
perUser = Path.basename(compileDir).match(/^[0-9a-f]{24}-[0-9a-f]{24}$/)
|
|
|
|
# Archive logs in background
|
|
if Settings.clsi?.archive_logs or Settings.clsi?.strace
|
|
OutputCacheManager.archiveLogs outputFiles, compileDir, buildId, (err) ->
|
|
if err?
|
|
logger.warn err:err, "erroring archiving log files"
|
|
|
|
# make the new cache directory
|
|
fse.ensureDir cacheDir, (err) ->
|
|
if err?
|
|
logger.error err: err, directory: cacheDir, "error creating cache directory"
|
|
callback(err, outputFiles)
|
|
else
|
|
# copy all the output files into the new cache directory
|
|
results = []
|
|
async.mapSeries outputFiles, (file, cb) ->
|
|
newFile = _.clone(file)
|
|
[src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)]
|
|
OutputCacheManager._checkFileIsSafe src, (err, isSafe) ->
|
|
return cb(err) if err?
|
|
if !isSafe
|
|
return cb()
|
|
OutputCacheManager._checkIfShouldCopy src, (err, shouldCopy) ->
|
|
return cb(err) if err?
|
|
if !shouldCopy
|
|
return cb()
|
|
OutputCacheManager._copyFile src, dst, (err) ->
|
|
return cb(err) if err?
|
|
newFile.build = buildId # attach a build id if we cached the file
|
|
results.push newFile
|
|
cb()
|
|
, (err) ->
|
|
if err?
|
|
# pass back the original files if we encountered *any* error
|
|
callback(err, outputFiles)
|
|
# clean up the directory we just created
|
|
fse.remove cacheDir, (err) ->
|
|
if err?
|
|
logger.error err: err, dir: cacheDir, "error removing cache dir after failure"
|
|
else
|
|
# pass back the list of new files in the cache
|
|
callback(err, results)
|
|
# let file expiry run in the background, expire all previous files if per-user
|
|
OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId, limit: if perUser then 1 else null}
|
|
|
|
archiveLogs: (outputFiles, compileDir, buildId, callback = (error) ->) ->
|
|
archiveDir = Path.join(compileDir, OutputCacheManager.ARCHIVE_SUBDIR, buildId)
|
|
logger.log {dir: archiveDir}, "archiving log files for project"
|
|
fse.ensureDir archiveDir, (err) ->
|
|
return callback(err) if err?
|
|
async.mapSeries outputFiles, (file, cb) ->
|
|
[src, dst] = [Path.join(compileDir, file.path), Path.join(archiveDir, file.path)]
|
|
OutputCacheManager._checkFileIsSafe src, (err, isSafe) ->
|
|
return cb(err) if err?
|
|
return cb() if !isSafe
|
|
OutputCacheManager._checkIfShouldArchive src, (err, shouldArchive) ->
|
|
return cb(err) if err?
|
|
return cb() if !shouldArchive
|
|
OutputCacheManager._copyFile src, dst, cb
|
|
, callback
|
|
|
|
expireOutputFiles: (cacheRoot, options, callback = (error) ->) ->
|
|
# look in compileDir for build dirs and delete if > N or age of mod time > T
|
|
fs.readdir cacheRoot, (err, results) ->
|
|
if err?
|
|
return callback(null) if err.code == 'ENOENT' # cache directory is empty
|
|
logger.error err: err, project_id: cacheRoot, "error clearing cache"
|
|
return callback(err)
|
|
|
|
dirs = results.sort().reverse()
|
|
currentTime = Date.now()
|
|
|
|
isExpired = (dir, index) ->
|
|
return false if options?.keep == dir
|
|
# remove any directories over the requested (non-null) limit
|
|
return true if options?.limit? and index > options.limit
|
|
# remove any directories over the hard limit
|
|
return true if index > OutputCacheManager.CACHE_LIMIT
|
|
# we can get the build time from the first part of the directory name DDDD-RRRR
|
|
# DDDD is date and RRRR is random bytes
|
|
dirTime = parseInt(dir.split('-')?[0], 16)
|
|
age = currentTime - dirTime
|
|
return age > OutputCacheManager.CACHE_AGE
|
|
|
|
toRemove = _.filter(dirs, isExpired)
|
|
|
|
removeDir = (dir, cb) ->
|
|
fse.remove Path.join(cacheRoot, dir), (err, result) ->
|
|
logger.log cache: cacheRoot, dir: dir, "removed expired cache dir"
|
|
if err?
|
|
logger.error err: err, dir: dir, "cache remove error"
|
|
cb(err, result)
|
|
|
|
async.eachSeries toRemove, (dir, cb) ->
|
|
removeDir dir, cb
|
|
, callback
|
|
|
|
_checkFileIsSafe: (src, callback = (error, isSafe) ->) ->
|
|
# check if we have a valid file to copy into the cache
|
|
fs.stat src, (err, stats) ->
|
|
if err?.code is 'ENOENT'
|
|
logger.warn err: err, file: src, "file has disappeared before copying to build cache"
|
|
callback(err, false)
|
|
else if err?
|
|
# some other problem reading the file
|
|
logger.error err: err, file: src, "stat error for file in cache"
|
|
callback(err, false)
|
|
else if not stats.isFile()
|
|
# other filetype - reject it
|
|
logger.warn src: src, stat: stats, "nonfile output - refusing to copy to cache"
|
|
callback(null, false)
|
|
else
|
|
# it's a plain file, ok to copy
|
|
callback(null, true)
|
|
|
|
_copyFile: (src, dst, callback) ->
|
|
# copy output file into the cache
|
|
fse.copy src, dst, (err) ->
|
|
if err?.code is 'ENOENT'
|
|
logger.warn err: err, file: src, "file has disappeared when copying to build cache"
|
|
callback(err, false)
|
|
else if err?
|
|
logger.error err: err, src: src, dst: dst, "copy error for file in cache"
|
|
callback(err)
|
|
else
|
|
if Settings.clsi?.optimiseInDocker
|
|
# don't run any optimisations on the pdf when they are done
|
|
# in the docker container
|
|
callback()
|
|
else
|
|
# call the optimiser for the file too
|
|
OutputFileOptimiser.optimiseFile src, dst, callback
|
|
|
|
_checkIfShouldCopy: (src, callback = (err, shouldCopy) ->) ->
|
|
return callback(null, !Path.basename(src).match(/^strace/))
|
|
|
|
_checkIfShouldArchive: (src, callback = (err, shouldCopy) ->) ->
|
|
if Path.basename(src).match(/^strace/)
|
|
return callback(null, true)
|
|
if Settings.clsi?.archive_logs and Path.basename(src) in ["output.log", "output.blg"]
|
|
return callback(null, true)
|
|
return callback(null, false)
|