mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #16 from sharelatex/cache-output-files
Keep output files in cache
This commit is contained in:
commit
416d7a17a7
13 changed files with 293 additions and 84 deletions
|
@ -12,6 +12,7 @@ Metrics.initialize("clsi")
|
|||
Metrics.open_sockets.monitor(logger)
|
||||
|
||||
ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager"
|
||||
OutputCacheManager = require "./app/js/OutputCacheManager"
|
||||
|
||||
require("./app/js/db").sync()
|
||||
|
||||
|
@ -36,7 +37,12 @@ app.delete "/project/:project_id", CompileController.clearCache
|
|||
app.get "/project/:project_id/sync/code", CompileController.syncFromCode
|
||||
app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf
|
||||
|
||||
staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, stat) ->
|
||||
ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks"
|
||||
|
||||
# create a static server which does not allow access to any symlinks
|
||||
# avoids possible mismatch of root directory between middleware check
|
||||
# and serving the files
|
||||
staticServer = ForbidSymlinks express.static, Settings.path.compilesDir, setHeaders: (res, path, stat) ->
|
||||
if Path.basename(path) == "output.pdf"
|
||||
res.set("Content-Type", "application/pdf")
|
||||
# Calculate an etag in the same way as nginx
|
||||
|
@ -50,8 +56,12 @@ staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path,
|
|||
# that could be used in same-origin/XSS attacks.
|
||||
res.set("Content-Type", "text/plain")
|
||||
|
||||
app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) ->
|
||||
req.url = "/#{req.params.project_id}/#{req.params[0]}"
|
||||
app.get "/project/:project_id/output/*", (req, res, next) ->
|
||||
if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX)
|
||||
# for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId)
|
||||
req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build, "/#{req.params[0]}")
|
||||
else
|
||||
req.url = "/#{req.params.project_id}/#{req.params[0]}"
|
||||
staticServer(req, res, next)
|
||||
|
||||
app.get "/status", (req, res, next) ->
|
||||
|
|
|
@ -35,6 +35,7 @@ module.exports = CompileController =
|
|||
outputFiles: outputFiles.map (file) ->
|
||||
url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}"
|
||||
type: file.type
|
||||
build: file.build
|
||||
}
|
||||
|
||||
clearCache: (req, res, next = (error) ->) ->
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
ResourceWriter = require "./ResourceWriter"
|
||||
LatexRunner = require "./LatexRunner"
|
||||
OutputFileFinder = require "./OutputFileFinder"
|
||||
OutputCacheManager = require "./OutputCacheManager"
|
||||
Settings = require("settings-sharelatex")
|
||||
Path = require "path"
|
||||
logger = require "logger-sharelatex"
|
||||
|
@ -32,7 +33,8 @@ module.exports = CompileManager =
|
|||
|
||||
OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) ->
|
||||
return callback(error) if error?
|
||||
callback null, outputFiles
|
||||
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
|
||||
callback null, newOutputFiles
|
||||
|
||||
clearProject: (project_id, _callback = (error) ->) ->
|
||||
callback = (error) ->
|
||||
|
|
113
services/clsi/app/coffee/OutputCacheManager.coffee
Normal file
113
services/clsi/app/coffee/OutputCacheManager.coffee
Normal file
|
@ -0,0 +1,113 @@
|
|||
async = require "async"
|
||||
fs = require "fs"
|
||||
fse = require "fs-extra"
|
||||
Path = require "path"
|
||||
logger = require "logger-sharelatex"
|
||||
_ = require "underscore"
|
||||
|
||||
OutputFileOptimiser = require "./OutputFileOptimiser"
|
||||
|
||||
module.exports = OutputCacheManager =
|
||||
CACHE_SUBDIR: '.cache/clsi'
|
||||
BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex
|
||||
CACHE_LIMIT: 2 # maximum number of cache directories
|
||||
CACHE_AGE: 60*60*1000 # up to one hour old
|
||||
|
||||
path: (buildId, file) ->
|
||||
# used by static server, given build id return '.cache/clsi/buildId'
|
||||
if buildId.match OutputCacheManager.BUILD_REGEX
|
||||
return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file)
|
||||
else
|
||||
# for invalid build id, return top level
|
||||
return file
|
||||
|
||||
saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) ->
|
||||
# make a compileDir/CACHE_SUBDIR/build_id directory and
|
||||
# copy all the output files into it
|
||||
cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR)
|
||||
# Put the files into a new cache subdirectory
|
||||
buildId = Date.now().toString(16)
|
||||
cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId)
|
||||
# let file expiry run in the background
|
||||
OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId}
|
||||
|
||||
checkFile = (src, callback) ->
|
||||
# check if we have a valid file to copy into the cache
|
||||
fs.stat src, (err, stats) ->
|
||||
if err?
|
||||
# some problem reading the file
|
||||
logger.error err: err, file: src, "stat error for file in cache"
|
||||
callback(err)
|
||||
else if not stats.isFile()
|
||||
# other filetype - reject it
|
||||
logger.error err: err, src: src, dst: dst, stat: stats, "nonfile output - refusing to copy to cache"
|
||||
callback(new Error("output file is not a file"), file)
|
||||
else
|
||||
# it's a plain file, ok to copy
|
||||
callback(null)
|
||||
|
||||
copyFile = (src, dst, callback) ->
|
||||
# copy output file into the cache
|
||||
fse.copy src, dst, (err) ->
|
||||
if err?
|
||||
logger.error err: err, src: src, dst: dst, "copy error for file in cache"
|
||||
callback(err)
|
||||
else
|
||||
# call the optimiser for the file too
|
||||
OutputFileOptimiser.optimiseFile src, dst, callback
|
||||
|
||||
# make the new cache directory
|
||||
fse.ensureDir cacheDir, (err) ->
|
||||
if err?
|
||||
logger.error err: err, directory: cacheDir, "error creating cache directory"
|
||||
callback(err, outputFiles)
|
||||
else
|
||||
# copy all the output files into the new cache directory
|
||||
async.mapSeries outputFiles, (file, cb) ->
|
||||
newFile = _.clone(file)
|
||||
[src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)]
|
||||
checkFile src, (err) ->
|
||||
copyFile src, dst, (err) ->
|
||||
if not err?
|
||||
newFile.build = buildId # attach a build id if we cached the file
|
||||
cb(err, newFile)
|
||||
, (err, results) ->
|
||||
if err?
|
||||
# pass back the original files if we encountered *any* error
|
||||
callback(err, outputFiles)
|
||||
else
|
||||
# pass back the list of new files in the cache
|
||||
callback(err, results)
|
||||
|
||||
expireOutputFiles: (cacheRoot, options, callback = (error) ->) ->
|
||||
# look in compileDir for build dirs and delete if > N or age of mod time > T
|
||||
fs.readdir cacheRoot, (err, results) ->
|
||||
if err?
|
||||
return callback(null) if err.code == 'ENOENT' # cache directory is empty
|
||||
logger.error err: err, project_id: cacheRoot, "error clearing cache"
|
||||
return callback(err)
|
||||
|
||||
dirs = results.sort().reverse()
|
||||
currentTime = Date.now()
|
||||
|
||||
isExpired = (dir, index) ->
|
||||
return false if options?.keep == dir
|
||||
# remove any directories over the hard limit
|
||||
return true if index > OutputCacheManager.CACHE_LIMIT
|
||||
# we can get the build time from the directory name
|
||||
dirTime = parseInt(dir, 16)
|
||||
age = currentTime - dirTime
|
||||
return age > OutputCacheManager.CACHE_AGE
|
||||
|
||||
toRemove = _.filter(dirs, isExpired)
|
||||
|
||||
removeDir = (dir, cb) ->
|
||||
fse.remove Path.join(cacheRoot, dir), (err, result) ->
|
||||
logger.log cache: cacheRoot, dir: dir, "removed expired cache dir"
|
||||
if err?
|
||||
logger.error err: err, dir: dir, "cache remove error"
|
||||
cb(err, result)
|
||||
|
||||
async.eachSeries toRemove, (dir, cb) ->
|
||||
removeDir dir, cb
|
||||
, callback
|
|
@ -37,7 +37,7 @@ module.exports = OutputFileFinder =
|
|||
_callback(error, fileList)
|
||||
_callback = () ->
|
||||
|
||||
args = [directory, "-type", "f"]
|
||||
args = [directory, "-name", ".cache", "-prune", "-o", "-type", "f", "-print"]
|
||||
logger.log args: args, "running find command"
|
||||
|
||||
proc = spawn("find", args)
|
||||
|
|
37
services/clsi/app/coffee/OutputFileOptimiser.coffee
Normal file
37
services/clsi/app/coffee/OutputFileOptimiser.coffee
Normal file
|
@ -0,0 +1,37 @@
|
|||
fs = require "fs"
|
||||
Path = require "path"
|
||||
spawn = require("child_process").spawn
|
||||
logger = require "logger-sharelatex"
|
||||
_ = require "underscore"
|
||||
|
||||
module.exports = OutputFileOptimiser =
|
||||
|
||||
optimiseFile: (src, dst, callback = (error) ->) ->
|
||||
# check output file (src) and see if we can optimise it, storing
|
||||
# the result in the build directory (dst)
|
||||
if src.match(/\.pdf$/)
|
||||
OutputFileOptimiser.optimisePDF src, dst, callback
|
||||
else
|
||||
callback (null)
|
||||
|
||||
optimisePDF: (src, dst, callback = (error) ->) ->
|
||||
tmpOutput = dst + '.opt'
|
||||
args = ["--linearize", src, tmpOutput]
|
||||
logger.log args: args, "running qpdf command"
|
||||
|
||||
proc = spawn("qpdf", args)
|
||||
stdout = ""
|
||||
proc.stdout.on "data", (chunk) ->
|
||||
stdout += chunk.toString()
|
||||
callback = _.once(callback) # avoid double call back for error and close event
|
||||
proc.on "error", (err) ->
|
||||
logger.warn {err, args}, "qpdf failed"
|
||||
callback(null) # ignore the error
|
||||
proc.on "close", (code) ->
|
||||
if code != 0
|
||||
logger.warn {code, args}, "qpdf returned error"
|
||||
return callback(null) # ignore the error
|
||||
fs.rename tmpOutput, dst, (err) ->
|
||||
if err?
|
||||
logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command"
|
||||
callback(null) # ignore the error
|
24
services/clsi/app/coffee/StaticServerForbidSymlinks.coffee
Normal file
24
services/clsi/app/coffee/StaticServerForbidSymlinks.coffee
Normal file
|
@ -0,0 +1,24 @@
|
|||
Path = require("path")
|
||||
fs = require("fs")
|
||||
Settings = require("settings-sharelatex")
|
||||
logger = require("logger-sharelatex")
|
||||
url = require "url"
|
||||
|
||||
module.exports = ForbidSymlinks = (staticFn, root, options) ->
|
||||
expressStatic = staticFn root, options
|
||||
basePath = Path.resolve(root)
|
||||
return (req, res, next) ->
|
||||
path = url.parse(req.url)?.pathname
|
||||
requestedFsPath = Path.normalize("#{basePath}/#{path}")
|
||||
fs.realpath requestedFsPath, (err, realFsPath)->
|
||||
if err?
|
||||
logger.warn err:err, requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "error checking file access"
|
||||
if err.code == 'ENOENT'
|
||||
return res.sendStatus(404)
|
||||
else
|
||||
return res.sendStatus(500)
|
||||
else if requestedFsPath != realFsPath
|
||||
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
|
||||
return res.sendStatus(404)
|
||||
else
|
||||
expressStatic(req, res, next)
|
|
@ -1,17 +0,0 @@
|
|||
Path = require("path")
|
||||
fs = require("fs")
|
||||
Settings = require("settings-sharelatex")
|
||||
logger = require("logger-sharelatex")
|
||||
|
||||
|
||||
module.exports = (req, res, next)->
|
||||
basePath = Path.resolve("#{Settings.path.compilesDir}/#{req.params.project_id}")
|
||||
requestedFsPath = Path.normalize("#{basePath}/#{req.params[0]}")
|
||||
fs.realpath requestedFsPath, (err, realFsPath)->
|
||||
if err?
|
||||
return res.send(500)
|
||||
else if requestedFsPath != realFsPath
|
||||
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
|
||||
return res.send(404)
|
||||
else
|
||||
return next()
|
|
@ -21,7 +21,9 @@
|
|||
"smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0",
|
||||
"sqlite3": "~2.2.0",
|
||||
"express": "^4.2.0",
|
||||
"body-parser": "^1.2.0"
|
||||
"body-parser": "^1.2.0",
|
||||
"fs-extra": "^0.16.3",
|
||||
"underscore": "^1.8.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"mocha": "1.10.0",
|
||||
|
|
|
@ -36,9 +36,11 @@ describe "CompileController", ->
|
|||
@output_files = [{
|
||||
path: "output.pdf"
|
||||
type: "pdf"
|
||||
build: 1234
|
||||
}, {
|
||||
path: "output.log"
|
||||
type: "log"
|
||||
build: 1234
|
||||
}]
|
||||
@RequestParser.parse = sinon.stub().callsArgWith(1, null, @request)
|
||||
@ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1)
|
||||
|
@ -73,6 +75,7 @@ describe "CompileController", ->
|
|||
outputFiles: @output_files.map (file) =>
|
||||
url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}"
|
||||
type: file.type
|
||||
build: file.build
|
||||
)
|
||||
.should.equal true
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ describe "CompileManager", ->
|
|||
"./LatexRunner": @LatexRunner = {}
|
||||
"./ResourceWriter": @ResourceWriter = {}
|
||||
"./OutputFileFinder": @OutputFileFinder = {}
|
||||
"./OutputCacheManager": @OutputCacheManager = {}
|
||||
"settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" }
|
||||
"logger-sharelatex": @logger = { log: sinon.stub() }
|
||||
"child_process": @child_process = {}
|
||||
|
@ -26,6 +27,15 @@ describe "CompileManager", ->
|
|||
path: "output.pdf"
|
||||
type: "pdf"
|
||||
}]
|
||||
@build_files = [{
|
||||
path: "output.log"
|
||||
type: "log"
|
||||
build: 1234
|
||||
}, {
|
||||
path: "output.pdf"
|
||||
type: "pdf"
|
||||
build: 1234
|
||||
}]
|
||||
@request =
|
||||
resources: @resources = "mock-resources"
|
||||
rootResourcePath: @rootResourcePath = "main.tex"
|
||||
|
@ -37,6 +47,7 @@ describe "CompileManager", ->
|
|||
@ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3)
|
||||
@LatexRunner.runLatex = sinon.stub().callsArg(2)
|
||||
@OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files)
|
||||
@OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files)
|
||||
@CompileManager.doCompile @request, @callback
|
||||
|
||||
it "should write the resources to disk", ->
|
||||
|
@ -60,7 +71,8 @@ describe "CompileManager", ->
|
|||
.should.equal true
|
||||
|
||||
it "should return the output files", ->
|
||||
@callback.calledWith(null, @output_files).should.equal true
|
||||
console.log 'output_files', @build_files
|
||||
@callback.calledWith(null, @build_files).should.equal true
|
||||
|
||||
describe "clearProject", ->
|
||||
describe "succesfully", ->
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
should = require('chai').should()
|
||||
SandboxedModule = require('sandboxed-module')
|
||||
assert = require('assert')
|
||||
path = require('path')
|
||||
sinon = require('sinon')
|
||||
modulePath = path.join __dirname, "../../../app/js/StaticServerForbidSymlinks"
|
||||
expect = require("chai").expect
|
||||
|
||||
describe "StaticServerForbidSymlinks", ->
|
||||
|
||||
beforeEach ->
|
||||
|
||||
@settings =
|
||||
path:
|
||||
compilesDir: "/compiles/here"
|
||||
|
||||
@fs = {}
|
||||
@ForbidSymlinks = SandboxedModule.require modulePath, requires:
|
||||
"settings-sharelatex":@settings
|
||||
"logger-sharelatex":
|
||||
log:->
|
||||
warn:->
|
||||
"fs":@fs
|
||||
|
||||
@dummyStatic = (rootDir, options) ->
|
||||
return (req, res, next) ->
|
||||
# console.log "dummyStatic serving file", rootDir, "called with", req.url
|
||||
# serve it
|
||||
next()
|
||||
|
||||
@StaticServerForbidSymlinks = @ForbidSymlinks @dummyStatic, @settings.path.compilesDir
|
||||
@req =
|
||||
params:
|
||||
project_id:"12345"
|
||||
|
||||
@res = {}
|
||||
@req.url = "/12345/output.pdf"
|
||||
|
||||
|
||||
describe "sending a normal file through", ->
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
|
||||
|
||||
it "should call next", (done)->
|
||||
@res.sendStatus = (resCode)->
|
||||
resCode.should.equal 200
|
||||
done()
|
||||
@StaticServerForbidSymlinks @req, @res, done
|
||||
|
||||
|
||||
describe "with a missing file", ->
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, {code: 'ENOENT'}, "#{@settings.path.compilesDir}/#{@req.params.project_id}/unknown.pdf")
|
||||
|
||||
it "should send a 404", (done)->
|
||||
@res.sendStatus = (resCode)->
|
||||
resCode.should.equal 404
|
||||
done()
|
||||
@StaticServerForbidSymlinks @req, @res
|
||||
|
||||
|
||||
describe "with a symlink file", ->
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
|
||||
|
||||
it "should send a 404", (done)->
|
||||
@res.sendStatus = (resCode)->
|
||||
resCode.should.equal 404
|
||||
done()
|
||||
@StaticServerForbidSymlinks @req, @res
|
||||
|
||||
describe "with an error from fs.realpath", ->
|
||||
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, "error")
|
||||
|
||||
it "should send a 500", (done)->
|
||||
@res.sendStatus = (resCode)->
|
||||
resCode.should.equal 500
|
||||
done()
|
||||
@StaticServerForbidSymlinks @req, @res
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
should = require('chai').should()
|
||||
SandboxedModule = require('sandboxed-module')
|
||||
assert = require('assert')
|
||||
path = require('path')
|
||||
sinon = require('sinon')
|
||||
modulePath = path.join __dirname, "../../../app/js/SymlinkCheckerMiddlewear"
|
||||
expect = require("chai").expect
|
||||
|
||||
describe "SymlinkCheckerMiddlewear", ->
|
||||
|
||||
beforeEach ->
|
||||
|
||||
@settings =
|
||||
path:
|
||||
compilesDir: "/compiles/here"
|
||||
|
||||
@fs = {}
|
||||
@SymlinkCheckerMiddlewear = SandboxedModule.require modulePath, requires:
|
||||
"settings-sharelatex":@settings
|
||||
"logger-sharelatex":
|
||||
log:->
|
||||
warn:->
|
||||
"fs":@fs
|
||||
@req =
|
||||
params:
|
||||
project_id:"12345"
|
||||
|
||||
@res = {}
|
||||
@req.params[0]= "output.pdf"
|
||||
|
||||
|
||||
describe "sending a normal file through", ->
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
|
||||
|
||||
it "should call next", (done)->
|
||||
@SymlinkCheckerMiddlewear @req, @res, done
|
||||
|
||||
|
||||
describe "with a symlink file", ->
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
|
||||
|
||||
it "should send a 404", (done)->
|
||||
@res.send = (resCode)->
|
||||
resCode.should.equal 404
|
||||
done()
|
||||
@SymlinkCheckerMiddlewear @req, @res
|
||||
|
||||
describe "with an error from fs.realpath", ->
|
||||
|
||||
beforeEach ->
|
||||
@fs.realpath = sinon.stub().callsArgWith(1, "error")
|
||||
|
||||
it "should send a 500", (done)->
|
||||
@res.send = (resCode)->
|
||||
resCode.should.equal 500
|
||||
done()
|
||||
@SymlinkCheckerMiddlewear @req, @res
|
||||
|
Loading…
Reference in a new issue