Merge pull request #66 from sharelatex/bg-compile-from-redis

Write files incrementally
This commit is contained in:
Brian Gough 2017-08-23 15:35:56 +01:00 committed by GitHub
commit 43f424295e
12 changed files with 272 additions and 15 deletions

View file

@ -4,6 +4,7 @@ Settings = require "settings-sharelatex"
Metrics = require "./Metrics"
ProjectPersistenceManager = require "./ProjectPersistenceManager"
logger = require "logger-sharelatex"
Errors = require "./Errors"
module.exports = CompileController =
compile: (req, res, next = (error) ->) ->
@ -15,7 +16,10 @@ module.exports = CompileController =
ProjectPersistenceManager.markProjectAsJustAccessed request.project_id, (error) ->
return next(error) if error?
CompileManager.doCompile request, (error, outputFiles = []) ->
if error?.terminated
if error instanceof Errors.FilesOutOfSyncError
code = 409 # Http 409 Conflict
status = "retry"
else if error?.terminated
status = "terminated"
else if error?.validate
status = "validation-#{error.validate}"

View file

@ -31,8 +31,12 @@ module.exports = CompileManager =
timer = new Metrics.Timer("write-to-disk")
logger.log project_id: request.project_id, user_id: request.user_id, "syncing resources to disk"
ResourceWriter.syncResourcesToDisk request.project_id, request.resources, compileDir, (error) ->
if error?
ResourceWriter.syncResourcesToDisk request, compileDir, (error, resourceList) ->
# NOTE: resourceList is insecure, it should only be used to exclude files from the output list
if error? and error instanceof Errors.FilesOutOfSyncError
logger.warn project_id: request.project_id, user_id: request.user_id, "files out of sync, please retry"
return callback(error)
else if error?
logger.err err:error, project_id: request.project_id, user_id: request.user_id, "error writing resources to disk"
return callback(error)
logger.log project_id: request.project_id, user_id: request.user_id, time_taken: Date.now() - timer.start, "written files to disk"
@ -45,7 +49,7 @@ module.exports = CompileManager =
callback()
createTikzFileIfRequired = (callback) ->
if TikzManager.needsOutputFile(request.rootResourcePath, request.resources)
if TikzManager.needsOutputFile(request.rootResourcePath, resourceList)
TikzManager.injectOutputFile compileDir, request.rootResourcePath, callback
else
callback()
@ -91,7 +95,7 @@ module.exports = CompileManager =
error.validate = "fail"
# compile was killed by user, was a validation, or a compile which failed validation
if error?.terminated or error?.validate
OutputFileFinder.findOutputFiles request.resources, compileDir, (err, outputFiles) ->
OutputFileFinder.findOutputFiles resourceList, compileDir, (err, outputFiles) ->
return callback(err) if err?
callback(error, outputFiles) # return output files so user can check logs
return
@ -111,7 +115,7 @@ module.exports = CompileManager =
if stats?["latex-runs"] > 0 and timings?["cpu-time"] > 0
Metrics.timing("run-compile-cpu-time-per-pass", timings["cpu-time"] / stats["latex-runs"])
OutputFileFinder.findOutputFiles request.resources, compileDir, (error, outputFiles) ->
OutputFileFinder.findOutputFiles resourceList, compileDir, (error, outputFiles) ->
return callback(error) if error?
OutputCacheManager.saveOutputFiles outputFiles, compileDir, (error, newOutputFiles) ->
callback null, newOutputFiles

View file

@ -5,6 +5,9 @@ module.exports = DraftModeManager =
injectDraftMode: (filename, callback = (error) ->) ->
fs.readFile filename, "utf8", (error, content) ->
return callback(error) if error?
# avoid adding draft mode more than once
if content?.indexOf("\\documentclass\[draft") >= 0
return callback()
modified_content = DraftModeManager._injectDraftOption content
logger.log {
content: content.slice(0,1024), # \documentclass is normally v near the top

View file

@ -5,6 +5,13 @@ NotFoundError = (message) ->
return error
NotFoundError.prototype.__proto__ = Error.prototype
FilesOutOfSyncError = (message) ->
error = new Error(message)
error.name = "FilesOutOfSyncError"
error.__proto__ = FilesOutOfSyncError.prototype
return error
FilesOutOfSyncError.prototype.__proto__ = Error.prototype
module.exports = Errors =
NotFoundError: NotFoundError
FilesOutOfSyncError: FilesOutOfSyncError

View file

@ -32,6 +32,27 @@ module.exports = RequestParser =
compile.options.check,
type: "string"
# The syncType specifies whether the request contains all
# resources (full) or only those resources to be updated
# in-place (incremental).
response.syncType = @_parseAttribute "syncType",
compile.options.syncType,
validValues: ["full", "incremental"]
type: "string"
# The syncState is an identifier passed in with the request
# which has the property that it changes when any resource is
# added, deleted, moved or renamed.
#
# on syncType full the syncState identifier is passed in and
# stored
#
# on syncType incremental the syncState identifier must match
# the stored value
response.syncState = @_parseAttribute "syncState",
compile.options.syncState,
type: "string"
if response.timeout > RequestParser.MAX_TIMEOUT
response.timeout = RequestParser.MAX_TIMEOUT
response.timeout = response.timeout * 1000 # milliseconds

View file

@ -0,0 +1,25 @@
Path = require "path"
fs = require "fs"
logger = require "logger-sharelatex"
settings = require("settings-sharelatex")
SafeReader = require "./SafeReader"
module.exports = ResourceListManager =
# This file is a list of the input files for the project, one per
# line, used to identify output files (i.e. files not on this list)
# when the incoming request is incremental.
RESOURCE_LIST_FILE: ".project-resource-list"
saveResourceList: (resources, basePath, callback = (error) ->) ->
resourceListFile = Path.join(basePath, @RESOURCE_LIST_FILE)
resourceList = (resource.path for resource in resources)
fs.writeFile resourceListFile, resourceList.join("\n"), callback
loadResourceList: (basePath, callback = (error) ->) ->
resourceListFile = Path.join(basePath, @RESOURCE_LIST_FILE)
# limit file to 128K, compile directory is user accessible
SafeReader.readFile resourceListFile, 128*1024, 'utf8', (err, resourceList) ->
return callback(err) if err?
resources = ({path: path} for path in resourceList?.toString()?.split("\n") or [])
callback(null, resources)

View file

@ -0,0 +1,45 @@
Path = require "path"
fs = require "fs"
logger = require "logger-sharelatex"
settings = require("settings-sharelatex")
Errors = require "./Errors"
SafeReader = require "./SafeReader"
module.exports = ResourceStateManager =
# The sync state is an identifier which must match for an
# incremental update to be allowed.
#
# The initial value is passed in and stored on a full
# compile.
#
# Subsequent incremental compiles must come with the same value - if
# not they will be rejected with a 409 Conflict response.
#
# An incremental compile can only update existing files with new
# content. The sync state identifier must change if any docs or
# files are moved, added, deleted or renamed.
SYNC_STATE_FILE: ".project-sync-state"
saveProjectStateHash: (state, basePath, callback) ->
stateFile = Path.join(basePath, @SYNC_STATE_FILE)
if not state? # remove the file if no state passed in
logger.log state:state, basePath:basePath, "clearing sync state"
fs.unlink stateFile, (err) ->
if err? and err.code isnt 'ENOENT'
return callback(err)
else
return callback()
else
logger.log state:state, basePath:basePath, "writing sync state"
fs.writeFile stateFile, state, {encoding: 'ascii'}, callback
checkProjectStateHashMatches: (state, basePath, callback) ->
stateFile = Path.join(basePath, @SYNC_STATE_FILE)
SafeReader.readFile stateFile, 64, 'ascii', (err, oldState) ->
return callback(err) if err?
if state isnt oldState
return callback new Errors.FilesOutOfSyncError("invalid state for incremental update")
else
callback(null)

View file

@ -4,6 +4,8 @@ fs = require "fs"
async = require "async"
mkdirp = require "mkdirp"
OutputFileFinder = require "./OutputFileFinder"
ResourceStateManager = require "./ResourceStateManager"
ResourceListManager = require "./ResourceListManager"
Metrics = require "./Metrics"
logger = require "logger-sharelatex"
settings = require("settings-sharelatex")
@ -11,7 +13,36 @@ settings = require("settings-sharelatex")
parallelFileDownloads = settings.parallelFileDownloads or 1
module.exports = ResourceWriter =
syncResourcesToDisk: (project_id, resources, basePath, callback = (error) ->) ->
syncResourcesToDisk: (request, basePath, callback = (error, resourceList) ->) ->
if request.syncType is "incremental"
ResourceStateManager.checkProjectStateHashMatches request.syncState, basePath, (error) ->
return callback(error) if error?
ResourceListManager.loadResourceList basePath, (error, resourceList) ->
return callback(error) if error?
ResourceWriter._removeExtraneousFiles resourceList, basePath, (error) =>
return callback(error) if error?
ResourceWriter.saveIncrementalResourcesToDisk request.project_id, request.resources, basePath, (error) ->
return callback(error) if error?
callback(null, resourceList)
else
@saveAllResourcesToDisk request.project_id, request.resources, basePath, (error) ->
return callback(error) if error?
ResourceStateManager.saveProjectStateHash request.syncState, basePath, (error) ->
return callback(error) if error?
ResourceListManager.saveResourceList request.resources, basePath, (error) =>
return callback(error) if error?
callback(null, request.resources)
saveIncrementalResourcesToDisk: (project_id, resources, basePath, callback = (error) ->) ->
@_createDirectory basePath, (error) =>
return callback(error) if error?
jobs = for resource in resources
do (resource) =>
(callback) => @_writeResourceToDisk(project_id, resource, basePath, callback)
async.parallelLimit jobs, parallelFileDownloads, callback
saveAllResourcesToDisk: (project_id, resources, basePath, callback = (error) ->) ->
@_createDirectory basePath, (error) =>
return callback(error) if error?
@_removeExtraneousFiles resources, basePath, (error) =>
@ -48,6 +79,8 @@ module.exports = ResourceWriter =
should_delete = true
if path.match(/^output\./) or path.match(/\.aux$/) or path.match(/^cache\//) # knitr cache
should_delete = false
if path in ['.project-resource-list', '.project-sync-state']
should_delete = false
if path == "output.pdf" or path == "output.dvi" or path == "output.log" or path == "output.xdv"
should_delete = true
if path == "output.tex" # created by TikzManager if present in output files

View file

@ -0,0 +1,24 @@
fs = require "fs"
module.exports = SafeReader =
# safely read up to size bytes from a file and return result as a
# string
readFile: (file, size, encoding, callback = (error, result) ->) ->
fs.open file, 'r', (err, fd) ->
return callback() if err? and err.code is 'ENOENT'
return callback(err) if err?
# safely return always closing the file
callbackWithClose = (err, result) ->
fs.close fd, (err1) ->
return callback(err) if err?
return callback(err1) if err1?
callback(null, result)
buff = new Buffer(size, 0) # fill with zeros
fs.read fd, buff, 0, buff.length, 0, (err, bytesRead, buffer) ->
return callbackWithClose(err) if err?
result = buffer.toString(encoding, 0, bytesRead)
callbackWithClose(null, result)

View file

@ -51,7 +51,7 @@ describe "CompileManager", ->
@env = {}
@Settings.compileDir = "compiles"
@compileDir = "#{@Settings.path.compilesDir}/#{@project_id}-#{@user_id}"
@ResourceWriter.syncResourcesToDisk = sinon.stub().callsArg(3)
@ResourceWriter.syncResourcesToDisk = sinon.stub().callsArgWith(2, null, @resources)
@LatexRunner.runLatex = sinon.stub().callsArg(2)
@OutputFileFinder.findOutputFiles = sinon.stub().callsArgWith(2, null, @output_files)
@OutputCacheManager.saveOutputFiles = sinon.stub().callsArgWith(2, null, @build_files)
@ -64,7 +64,7 @@ describe "CompileManager", ->
it "should write the resources to disk", ->
@ResourceWriter.syncResourcesToDisk
.calledWith(@project_id, @resources, @compileDir)
.calledWith(@request, @compileDir)
.should.equal true
it "should run LaTeX", ->

View file

@ -242,3 +242,13 @@ describe "RequestParser", ->
it "should return an error", ->
@callback.calledWith("relative path in root resource")
.should.equal true
describe "with an unknown syncType", ->
beforeEach ->
@validRequest.compile.options.syncType = "unexpected"
@RequestParser.parse @validRequest, @callback
@data = @callback.args[0][1]
it "should return an error", ->
@callback.calledWith("syncType attribute should be one of: full, incremental")
.should.equal true

View file

@ -7,7 +7,11 @@ path = require "path"
describe "ResourceWriter", ->
beforeEach ->
@ResourceWriter = SandboxedModule.require modulePath, requires:
"fs": @fs = { mkdir: sinon.stub().callsArg(1) }
"fs": @fs =
mkdir: sinon.stub().callsArg(1)
unlink: sinon.stub().callsArg(1)
"./ResourceListManager": @ResourceListManager = {}
"./ResourceStateManager": @ResourceStateManager = {}
"wrench": @wrench = {}
"./UrlCache" : @UrlCache = {}
"mkdirp" : @mkdirp = sinon.stub().callsArg(1)
@ -20,7 +24,7 @@ describe "ResourceWriter", ->
@basePath = "/path/to/write/files/to"
@callback = sinon.stub()
describe "syncResourcesToDisk", ->
describe "syncResourcesToDisk on a full request", ->
beforeEach ->
@resources = [
"resource-1-mock"
@ -29,7 +33,62 @@ describe "ResourceWriter", ->
]
@ResourceWriter._writeResourceToDisk = sinon.stub().callsArg(3)
@ResourceWriter._removeExtraneousFiles = sinon.stub().callsArg(2)
@ResourceWriter.syncResourcesToDisk(@project_id, @resources, @basePath, @callback)
@ResourceStateManager.checkProjectStateHashMatches = sinon.stub().callsArg(2)
@ResourceStateManager.saveProjectStateHash = sinon.stub().callsArg(2)
@ResourceListManager.saveResourceList = sinon.stub().callsArg(2)
@ResourceListManager.loadResourceList = sinon.stub().callsArg(1)
@ResourceWriter.syncResourcesToDisk({
project_id: @project_id
syncState: @syncState = "0123456789abcdef"
resources: @resources
}, @basePath, @callback)
it "should remove old files", ->
@ResourceWriter._removeExtraneousFiles
.calledWith(@resources, @basePath)
.should.equal true
it "should write each resource to disk", ->
for resource in @resources
@ResourceWriter._writeResourceToDisk
.calledWith(@project_id, resource, @basePath)
.should.equal true
it "should store the sync state", ->
@ResourceStateManager.saveProjectStateHash
.calledWith(@syncState, @basePath)
.should.equal true
it "should save the resource list", ->
@ResourceListManager.saveResourceList
.calledWith(@resources, @basePath)
.should.equal true
it "should call the callback", ->
@callback.called.should.equal true
describe "syncResourcesToDisk on an incremental update", ->
beforeEach ->
@resources = [
"resource-1-mock"
]
@ResourceWriter._writeResourceToDisk = sinon.stub().callsArg(3)
@ResourceWriter._removeExtraneousFiles = sinon.stub().callsArg(2)
@ResourceStateManager.checkProjectStateHashMatches = sinon.stub().callsArg(2)
@ResourceStateManager.saveProjectStateHash = sinon.stub().callsArg(2)
@ResourceListManager.saveResourceList = sinon.stub().callsArg(2)
@ResourceListManager.loadResourceList = sinon.stub().callsArgWith(1, null, @resources)
@ResourceWriter.syncResourcesToDisk({
project_id: @project_id,
syncType: "incremental",
syncState: @syncState = "1234567890abcdef",
resources: @resources
}, @basePath, @callback)
it "should check the sync state matches", ->
@ResourceStateManager.checkProjectStateHashMatches
.calledWith(@syncState, @basePath)
.should.equal true
it "should remove old files", ->
@ResourceWriter._removeExtraneousFiles
@ -45,6 +104,28 @@ describe "ResourceWriter", ->
it "should call the callback", ->
@callback.called.should.equal true
describe "syncResourcesToDisk on an incremental update when the state does not match", ->
beforeEach ->
@resources = [
"resource-1-mock"
]
@ResourceStateManager.checkProjectStateHashMatches = sinon.stub().callsArgWith(2, @error = new Error())
@ResourceWriter.syncResourcesToDisk({
project_id: @project_id,
syncType: "incremental",
syncState: @syncState = "1234567890abcdef",
resources: @resources
}, @basePath, @callback)
it "should check whether the sync state matches", ->
@ResourceStateManager.checkProjectStateHashMatches
.calledWith(@syncState, @basePath)
.should.equal true
it "should call the callback with an error", ->
@callback.calledWith(@error).should.equal true
describe "_removeExtraneousFiles", ->
beforeEach ->
@output_files = [{