Merge pull request #16 from sharelatex/cache-output-files

Keep output files in cache
This commit is contained in:
Brian Gough 2015-03-02 12:06:06 +00:00
commit 416d7a17a7
13 changed files with 293 additions and 84 deletions

View file

@ -12,6 +12,7 @@ Metrics.initialize("clsi")
Metrics.open_sockets.monitor(logger)
ProjectPersistenceManager = require "./app/js/ProjectPersistenceManager"
OutputCacheManager = require "./app/js/OutputCacheManager"
require("./app/js/db").sync()
@ -36,7 +37,12 @@ app.delete "/project/:project_id", CompileController.clearCache
app.get "/project/:project_id/sync/code", CompileController.syncFromCode
app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf
staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path, stat) ->
ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks"
# create a static server which does not allow access to any symlinks
# avoids possible mismatch of root directory between middleware check
# and serving the files
staticServer = ForbidSymlinks express.static, Settings.path.compilesDir, setHeaders: (res, path, stat) ->
if Path.basename(path) == "output.pdf"
res.set("Content-Type", "application/pdf")
# Calculate an etag in the same way as nginx
@ -50,8 +56,12 @@ staticServer = express.static Settings.path.compilesDir, setHeaders: (res, path,
# that could be used in same-origin/XSS attacks.
res.set("Content-Type", "text/plain")
app.get "/project/:project_id/output/*", require("./app/js/SymlinkCheckerMiddlewear"), (req, res, next) ->
req.url = "/#{req.params.project_id}/#{req.params[0]}"
app.get "/project/:project_id/output/*", (req, res, next) ->
if req.query?.build? && req.query.build.match(OutputCacheManager.BUILD_REGEX)
# for specific build get the path from the OutputCacheManager (e.g. .clsi/buildId)
req.url = "/#{req.params.project_id}/" + OutputCacheManager.path(req.query.build, "/#{req.params[0]}")
else
req.url = "/#{req.params.project_id}/#{req.params[0]}"
staticServer(req, res, next)
app.get "/status", (req, res, next) ->

View file

@ -35,6 +35,7 @@ module.exports = CompileController =
outputFiles: outputFiles.map (file) ->
url: "#{Settings.apis.clsi.url}/project/#{request.project_id}/output/#{file.path}"
type: file.type
build: file.build
}
clearCache: (req, res, next = (error) ->) ->

View file

@ -1,6 +1,7 @@
ResourceWriter = require "./ResourceWriter"
LatexRunner = require "./LatexRunner"
OutputFileFinder = require "./OutputFileFinder"
OutputCacheManager = require "./OutputCacheManager"
Settings = require("settings-sharelatex")
Path = require "path"
logger = require "logger-sharelatex"
@ -32,7 +33,8 @@ module.exports = CompileManager =
OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) ->
return callback(error) if error?
callback null, outputFiles
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles
clearProject: (project_id, _callback = (error) ->) ->
callback = (error) ->

View file

@ -0,0 +1,113 @@
async = require "async"
fs = require "fs"
fse = require "fs-extra"
Path = require "path"
logger = require "logger-sharelatex"
_ = require "underscore"
OutputFileOptimiser = require "./OutputFileOptimiser"
module.exports = OutputCacheManager =
CACHE_SUBDIR: '.cache/clsi'
BUILD_REGEX: /^[0-9a-f]+$/ # build id is Date.now() converted to hex
CACHE_LIMIT: 2 # maximum number of cache directories
CACHE_AGE: 60*60*1000 # up to one hour old
path: (buildId, file) ->
# used by static server, given build id return '.cache/clsi/buildId'
if buildId.match OutputCacheManager.BUILD_REGEX
return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file)
else
# for invalid build id, return top level
return file
saveOutputFiles: (outputFiles, compileDir, callback = (error) ->) ->
# make a compileDir/CACHE_SUBDIR/build_id directory and
# copy all the output files into it
cacheRoot = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR)
# Put the files into a new cache subdirectory
buildId = Date.now().toString(16)
cacheDir = Path.join(compileDir, OutputCacheManager.CACHE_SUBDIR, buildId)
# let file expiry run in the background
OutputCacheManager.expireOutputFiles cacheRoot, {keep: buildId}
checkFile = (src, callback) ->
# check if we have a valid file to copy into the cache
fs.stat src, (err, stats) ->
if err?
# some problem reading the file
logger.error err: err, file: src, "stat error for file in cache"
callback(err)
else if not stats.isFile()
# other filetype - reject it
logger.error err: err, src: src, dst: dst, stat: stats, "nonfile output - refusing to copy to cache"
callback(new Error("output file is not a file"), file)
else
# it's a plain file, ok to copy
callback(null)
copyFile = (src, dst, callback) ->
# copy output file into the cache
fse.copy src, dst, (err) ->
if err?
logger.error err: err, src: src, dst: dst, "copy error for file in cache"
callback(err)
else
# call the optimiser for the file too
OutputFileOptimiser.optimiseFile src, dst, callback
# make the new cache directory
fse.ensureDir cacheDir, (err) ->
if err?
logger.error err: err, directory: cacheDir, "error creating cache directory"
callback(err, outputFiles)
else
# copy all the output files into the new cache directory
async.mapSeries outputFiles, (file, cb) ->
newFile = _.clone(file)
[src, dst] = [Path.join(compileDir, file.path), Path.join(cacheDir, file.path)]
checkFile src, (err) ->
copyFile src, dst, (err) ->
if not err?
newFile.build = buildId # attach a build id if we cached the file
cb(err, newFile)
, (err, results) ->
if err?
# pass back the original files if we encountered *any* error
callback(err, outputFiles)
else
# pass back the list of new files in the cache
callback(err, results)
expireOutputFiles: (cacheRoot, options, callback = (error) ->) ->
# look in compileDir for build dirs and delete if > N or age of mod time > T
fs.readdir cacheRoot, (err, results) ->
if err?
return callback(null) if err.code == 'ENOENT' # cache directory is empty
logger.error err: err, project_id: cacheRoot, "error clearing cache"
return callback(err)
dirs = results.sort().reverse()
currentTime = Date.now()
isExpired = (dir, index) ->
return false if options?.keep == dir
# remove any directories over the hard limit
return true if index > OutputCacheManager.CACHE_LIMIT
# we can get the build time from the directory name
dirTime = parseInt(dir, 16)
age = currentTime - dirTime
return age > OutputCacheManager.CACHE_AGE
toRemove = _.filter(dirs, isExpired)
removeDir = (dir, cb) ->
fse.remove Path.join(cacheRoot, dir), (err, result) ->
logger.log cache: cacheRoot, dir: dir, "removed expired cache dir"
if err?
logger.error err: err, dir: dir, "cache remove error"
cb(err, result)
async.eachSeries toRemove, (dir, cb) ->
removeDir dir, cb
, callback

View file

@ -37,7 +37,7 @@ module.exports = OutputFileFinder =
_callback(error, fileList)
_callback = () ->
args = [directory, "-type", "f"]
args = [directory, "-name", ".cache", "-prune", "-o", "-type", "f", "-print"]
logger.log args: args, "running find command"
proc = spawn("find", args)

View file

@ -0,0 +1,37 @@
fs = require "fs"
Path = require "path"
spawn = require("child_process").spawn
logger = require "logger-sharelatex"
_ = require "underscore"
module.exports = OutputFileOptimiser =
optimiseFile: (src, dst, callback = (error) ->) ->
# check output file (src) and see if we can optimise it, storing
# the result in the build directory (dst)
if src.match(/\.pdf$/)
OutputFileOptimiser.optimisePDF src, dst, callback
else
callback (null)
optimisePDF: (src, dst, callback = (error) ->) ->
tmpOutput = dst + '.opt'
args = ["--linearize", src, tmpOutput]
logger.log args: args, "running qpdf command"
proc = spawn("qpdf", args)
stdout = ""
proc.stdout.on "data", (chunk) ->
stdout += chunk.toString()
callback = _.once(callback) # avoid double call back for error and close event
proc.on "error", (err) ->
logger.warn {err, args}, "qpdf failed"
callback(null) # ignore the error
proc.on "close", (code) ->
if code != 0
logger.warn {code, args}, "qpdf returned error"
return callback(null) # ignore the error
fs.rename tmpOutput, dst, (err) ->
if err?
logger.warn {tmpOutput, dst}, "failed to rename output of qpdf command"
callback(null) # ignore the error

View file

@ -0,0 +1,24 @@
Path = require("path")
fs = require("fs")
Settings = require("settings-sharelatex")
logger = require("logger-sharelatex")
url = require "url"
module.exports = ForbidSymlinks = (staticFn, root, options) ->
expressStatic = staticFn root, options
basePath = Path.resolve(root)
return (req, res, next) ->
path = url.parse(req.url)?.pathname
requestedFsPath = Path.normalize("#{basePath}/#{path}")
fs.realpath requestedFsPath, (err, realFsPath)->
if err?
logger.warn err:err, requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "error checking file access"
if err.code == 'ENOENT'
return res.sendStatus(404)
else
return res.sendStatus(500)
else if requestedFsPath != realFsPath
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
return res.sendStatus(404)
else
expressStatic(req, res, next)

View file

@ -1,17 +0,0 @@
Path = require("path")
fs = require("fs")
Settings = require("settings-sharelatex")
logger = require("logger-sharelatex")
module.exports = (req, res, next)->
basePath = Path.resolve("#{Settings.path.compilesDir}/#{req.params.project_id}")
requestedFsPath = Path.normalize("#{basePath}/#{req.params[0]}")
fs.realpath requestedFsPath, (err, realFsPath)->
if err?
return res.send(500)
else if requestedFsPath != realFsPath
logger.warn requestedFsPath:requestedFsPath, realFsPath:realFsPath, path: req.params[0], project_id: req.params.project_id, "trying to access a different file (symlink), aborting"
return res.send(404)
else
return next()

View file

@ -21,7 +21,9 @@
"smoke-test-sharelatex": "git+https://github.com/sharelatex/smoke-test-sharelatex.git#v1.0.0",
"sqlite3": "~2.2.0",
"express": "^4.2.0",
"body-parser": "^1.2.0"
"body-parser": "^1.2.0",
"fs-extra": "^0.16.3",
"underscore": "^1.8.2"
},
"devDependencies": {
"mocha": "1.10.0",

View file

@ -36,9 +36,11 @@ describe "CompileController", ->
@output_files = [{
path: "output.pdf"
type: "pdf"
build: 1234
}, {
path: "output.log"
type: "log"
build: 1234
}]
@RequestParser.parse = sinon.stub().callsArgWith(1, null, @request)
@ProjectPersistenceManager.markProjectAsJustAccessed = sinon.stub().callsArg(1)
@ -73,6 +75,7 @@ describe "CompileController", ->
outputFiles: @output_files.map (file) =>
url: "#{@Settings.apis.clsi.url}/project/#{@project_id}/output/#{file.path}"
type: file.type
build: file.build
)
.should.equal true

View file

@ -12,6 +12,7 @@ describe "CompileManager", ->
"./LatexRunner": @LatexRunner = {}
"./ResourceWriter": @ResourceWriter = {}
"./OutputFileFinder": @OutputFileFinder = {}
"./OutputCacheManager": @OutputCacheManager = {}
"settings-sharelatex": @Settings = { path: compilesDir: "/compiles/dir" }
"logger-sharelatex": @logger = { log: sinon.stub() }
"child_process": @child_process = {}
@ -26,6 +27,15 @@ describe "CompileManager", ->
path: "output.pdf"
type: "pdf"
}]
@build_files = [{
path: "output.log"
type: "log"
build: 1234
}, {
path: "output.pdf"
type: "pdf"
build: 1234
}]
@request =
resources: @resources = "mock-resources"
rootResourcePath: @rootResourcePath = "main.tex"
@ -37,6 +47,7 @@ describe "CompileManager", ->
@ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3)
@LatexRunner.runLatex = sinon.stub().callsArg(2)
@OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files)
@OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files)
@CompileManager.doCompile @request, @callback
it "should write the resources to disk", ->
@ -60,7 +71,8 @@ describe "CompileManager", ->
.should.equal true
it "should return the output files", ->
@callback.calledWith(null, @output_files).should.equal true
console.log 'output_files', @build_files
@callback.calledWith(null, @build_files).should.equal true
describe "clearProject", ->
describe "succesfully", ->

View file

@ -0,0 +1,82 @@
should = require('chai').should()
SandboxedModule = require('sandboxed-module')
assert = require('assert')
path = require('path')
sinon = require('sinon')
modulePath = path.join __dirname, "../../../app/js/StaticServerForbidSymlinks"
expect = require("chai").expect
describe "StaticServerForbidSymlinks", ->
beforeEach ->
@settings =
path:
compilesDir: "/compiles/here"
@fs = {}
@ForbidSymlinks = SandboxedModule.require modulePath, requires:
"settings-sharelatex":@settings
"logger-sharelatex":
log:->
warn:->
"fs":@fs
@dummyStatic = (rootDir, options) ->
return (req, res, next) ->
# console.log "dummyStatic serving file", rootDir, "called with", req.url
# serve it
next()
@StaticServerForbidSymlinks = @ForbidSymlinks @dummyStatic, @settings.path.compilesDir
@req =
params:
project_id:"12345"
@res = {}
@req.url = "/12345/output.pdf"
describe "sending a normal file through", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
it "should call next", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 200
done()
@StaticServerForbidSymlinks @req, @res, done
describe "with a missing file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, {code: 'ENOENT'}, "#{@settings.path.compilesDir}/#{@req.params.project_id}/unknown.pdf")
it "should send a 404", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 404
done()
@StaticServerForbidSymlinks @req, @res
describe "with a symlink file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
it "should send a 404", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 404
done()
@StaticServerForbidSymlinks @req, @res
describe "with an error from fs.realpath", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, "error")
it "should send a 500", (done)->
@res.sendStatus = (resCode)->
resCode.should.equal 500
done()
@StaticServerForbidSymlinks @req, @res

View file

@ -1,60 +0,0 @@
should = require('chai').should()
SandboxedModule = require('sandboxed-module')
assert = require('assert')
path = require('path')
sinon = require('sinon')
modulePath = path.join __dirname, "../../../app/js/SymlinkCheckerMiddlewear"
expect = require("chai").expect
describe "SymlinkCheckerMiddlewear", ->
beforeEach ->
@settings =
path:
compilesDir: "/compiles/here"
@fs = {}
@SymlinkCheckerMiddlewear = SandboxedModule.require modulePath, requires:
"settings-sharelatex":@settings
"logger-sharelatex":
log:->
warn:->
"fs":@fs
@req =
params:
project_id:"12345"
@res = {}
@req.params[0]= "output.pdf"
describe "sending a normal file through", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "#{@settings.path.compilesDir}/#{@req.params.project_id}/output.pdf")
it "should call next", (done)->
@SymlinkCheckerMiddlewear @req, @res, done
describe "with a symlink file", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, null, "/etc/#{@req.params.project_id}/output.pdf")
it "should send a 404", (done)->
@res.send = (resCode)->
resCode.should.equal 404
done()
@SymlinkCheckerMiddlewear @req, @res
describe "with an error from fs.realpath", ->
beforeEach ->
@fs.realpath = sinon.stub().callsArgWith(1, "error")
it "should send a 500", (done)->
@res.send = (resCode)->
resCode.should.equal 500
done()
@SymlinkCheckerMiddlewear @req, @res