Merge pull request #1089 from overleaf/bg-compute-filestore-hash

compute filestore hash

GitOrigin-RevId: 23a909d2c2ea7aba3abfdb8f0c060e414a17e52e
This commit is contained in:
Brian Gough 2019-05-13 09:39:07 +01:00 committed by sharelatex
parent c989179517
commit 07b7566466
10 changed files with 134 additions and 69 deletions

View file

@ -208,6 +208,7 @@ module.exports = DocumentUpdaterHandler =
pathname: newEntity.path
docLines: newEntity.docLines
url: newEntity.url
hash: newEntity.file?.hash
else if newEntity.path != oldEntity.path
# entity renamed
updates.push

View file

@ -0,0 +1,35 @@
crypto = require "crypto"
logger = require("logger-sharelatex")
fs = require("fs")
_ = require("underscore")
module.exports = FileHashManager =
computeHash: (filePath, callback = (error, hashValue) ->) ->
callback = _.once(callback) # avoid double callbacks
# taken from v1/history/storage/lib/blob_hash.js
getGitBlobHeader = (byteLength) ->
return 'blob ' + byteLength + '\x00'
getByteLengthOfFile = (cb) ->
fs.stat filePath, (err, stats) ->
return cb(err) if err?
cb(null, stats.size)
getByteLengthOfFile (err, byteLength) ->
return callback(err) if err?
input = fs.createReadStream(filePath)
input.on 'error', (err) ->
logger.err {filePath: filePath, err:err}, "error opening file in computeHash"
return callback(err)
hash = crypto.createHash("sha1")
hash.setEncoding('hex')
hash.update(getGitBlobHeader(byteLength))
hash.on 'readable', () ->
result = hash.read()
if result?
callback(null, result.toString('hex'))
input.pipe(hash)

View file

@ -3,6 +3,8 @@ fs = require("fs")
request = require("request")
settings = require("settings-sharelatex")
Async = require('async')
FileHashManager = require("./FileHashManager")
File = require('../../models/File').File
oneMinInMs = 60 * 1000
fiveMinsInMs = oneMinInMs * 5
@ -11,53 +13,60 @@ module.exports = FileStoreHandler =
RETRY_ATTEMPTS: 3
uploadFileFromDisk: (project_id, file_id, fsPath, callback)->
uploadFileFromDisk: (project_id, file_args, fsPath, callback = (error, url, fileRef) ->)->
fs.lstat fsPath, (err, stat)->
if err?
logger.err err:err, project_id:project_id, file_id:file_id, fsPath:fsPath, "error stating file"
logger.err err:err, project_id:project_id, file_args:file_args, fsPath:fsPath, "error stating file"
callback(err)
if !stat?
logger.err project_id:project_id, file_id:file_id, fsPath:fsPath, "stat is not available, can not check file from disk"
logger.err project_id:project_id, file_args:file_args, fsPath:fsPath, "stat is not available, can not check file from disk"
return callback(new Error("error getting stat, not available"))
if !stat.isFile()
logger.log project_id:project_id, file_id:file_id, fsPath:fsPath, "tried to upload symlink, not contining"
logger.log project_id:project_id, file_args:file_args, fsPath:fsPath, "tried to upload symlink, not contining"
return callback(new Error("can not upload symlink"))
Async.retry FileStoreHandler.RETRY_ATTEMPTS, (cb) ->
FileStoreHandler._doUploadFileFromDisk project_id, file_id, fsPath, cb
, (err, url) ->
Async.retry FileStoreHandler.RETRY_ATTEMPTS, (cb, results) ->
FileStoreHandler._doUploadFileFromDisk project_id, file_args, fsPath, cb
, (err, result) ->
if err?
logger.err {err, project_id, file_id}, "Error uploading file, retries failed"
callback(err, url)
logger.err {err, project_id, file_args}, "Error uploading file, retries failed"
return callback(err)
callback(err, result.url, result.fileRef)
_doUploadFileFromDisk: (project_id, file_id, fsPath, callback) ->
_doUploadFileFromDisk: (project_id, file_args, fsPath, callback = (err, result)->) ->
_cb = callback
callback = (err, url) ->
callback = (err, result...) ->
callback = -> # avoid double callbacks
_cb(err, url)
_cb(err, result...)
logger.log project_id:project_id, file_id:file_id, fsPath:fsPath, "uploading file from disk"
readStream = fs.createReadStream(fsPath)
readStream.on "error", (err)->
logger.err err:err, project_id:project_id, file_id:file_id, fsPath:fsPath, "something went wrong on the read stream of uploadFileFromDisk"
callback err
readStream.on "open", () ->
url = FileStoreHandler._buildUrl(project_id, file_id)
opts =
method: "post"
uri: url
timeout:fiveMinsInMs
writeStream = request(opts)
writeStream.on "error", (err)->
logger.err err:err, project_id:project_id, file_id:file_id, fsPath:fsPath, "something went wrong on the write stream of uploadFileFromDisk"
FileHashManager.computeHash fsPath, (err, hashValue) ->
return callback(err) if err?
fileRef = new File(Object.assign({}, file_args, {hash: hashValue}))
file_id = fileRef._id
logger.log project_id:project_id, file_id:file_id, fsPath:fsPath, hash: hashValue, fileRef:fileRef, "uploading file from disk"
readStream = fs.createReadStream(fsPath)
readStream.on "error", (err)->
logger.err err:err, project_id:project_id, file_id:file_id, fsPath:fsPath, "something went wrong on the read stream of uploadFileFromDisk"
callback err
writeStream.on 'response', (response) ->
if response.statusCode not in [200, 201]
err = new Error("non-ok response from filestore for upload: #{response.statusCode}")
logger.err {err, statusCode: response.statusCode}, "error uploading to filestore"
callback(err)
else
callback(null, url)
readStream.pipe writeStream
readStream.on "open", () ->
url = FileStoreHandler._buildUrl(project_id, file_id)
opts =
method: "post"
uri: url
timeout:fiveMinsInMs
headers:
"X-File-Hash-From-Web": hashValue # send the hash to the filestore as a custom header so it can be checked
writeStream = request(opts)
writeStream.on "error", (err)->
logger.err err:err, project_id:project_id, file_id:file_id, fsPath:fsPath, "something went wrong on the write stream of uploadFileFromDisk"
callback err
writeStream.on 'response', (response) ->
if response.statusCode not in [200, 201]
err = new Error("non-ok response from filestore for upload: #{response.statusCode}")
logger.err {err, statusCode: response.statusCode}, "error uploading to filestore"
callback(err)
else
callback(null, {url, fileRef}) # have to pass back an object because async.retry only accepts a single result argument
readStream.pipe writeStream
getFileStream: (project_id, file_id, query, callback)->
logger.log project_id:project_id, file_id:file_id, query:query, "getting file stream from file store"

View file

@ -64,6 +64,7 @@ module.exports = ProjectEntityMongoUpdateHandler = self =
set["#{path.mongo}.created"] = new Date()
set["#{path.mongo}.linkedFileData"] = newFileRef.linkedFileData
inc["#{path.mongo}.rev"] = 1
set["#{path.mongo}.hash"] = newFileRef.hash
update =
"$inc": inc
"$set": set
@ -377,6 +378,7 @@ module.exports = ProjectEntityMongoUpdateHandler = self =
_id: fileRef._id
name: fileRef.name
linkedFileData: fileRef.linkedFileData
hash: fileRef.hash
deletedAt: new Date()
}
}

View file

@ -169,18 +169,17 @@ module.exports = ProjectEntityUpdateHandler = self =
return callback(error) if error?
callback null, doc, folder_id
_uploadFile: (project_id, folder_id, fileName, fsPath, linkedFileData, callback = (error, fileRef, fileStoreUrl) ->)->
_uploadFile: (project_id, folder_id, fileName, fsPath, linkedFileData, callback = (error, fileStoreUrl, fileRef) ->)->
if not SafePath.isCleanFilename fileName
return callback new Errors.InvalidNameError("invalid element name")
fileRef = new File(
fileArgs =
name: fileName
linkedFileData: linkedFileData
)
FileStoreHandler.uploadFileFromDisk project_id, fileRef._id, fsPath, (err, fileStoreUrl)->
FileStoreHandler.uploadFileFromDisk project_id, fileArgs, fsPath, (err, fileStoreUrl, fileRef)->
if err?
logger.err err:err, project_id: project_id, folder_id: folder_id, file_name: fileName, fileRef:fileRef, "error uploading image to s3"
return callback(err)
callback(null, fileRef, fileStoreUrl)
callback(null, fileStoreUrl, fileRef)
_addFileAndSendToTpds: (project_id, folder_id, fileRef, callback = (error) ->)->
ProjectEntityMongoUpdateHandler.addFile project_id, folder_id, fileRef, (err, result, project) ->
@ -196,7 +195,7 @@ module.exports = ProjectEntityUpdateHandler = self =
(project_id, folder_id, fileName, fsPath, linkedFileData, userId, callback) ->
if not SafePath.isCleanFilename fileName
return callback new Errors.InvalidNameError("invalid element name")
ProjectEntityUpdateHandler._uploadFile project_id, folder_id, fileName, fsPath, linkedFileData, (error, fileRef, fileStoreUrl) ->
ProjectEntityUpdateHandler._uploadFile project_id, folder_id, fileName, fsPath, linkedFileData, (error, fileStoreUrl, fileRef) ->
return callback(error) if error?
next(project_id, folder_id, fileName, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback)
withLock: (project_id, folder_id, fileName, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback = (error, fileRef, folder_id) ->)->
@ -216,11 +215,10 @@ module.exports = ProjectEntityUpdateHandler = self =
beforeLock: (next) ->
(project_id, file_id, fsPath, linkedFileData, userId, callback)->
# create a new file
fileRef = new File(
fileArgs =
name: "dummy-upload-filename"
linkedFileData: linkedFileData
)
FileStoreHandler.uploadFileFromDisk project_id, fileRef._id, fsPath, (err, fileStoreUrl)->
FileStoreHandler.uploadFileFromDisk project_id, fileArgs, fsPath, (err, fileStoreUrl, fileRef)->
return callback(err) if err?
next project_id, file_id, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback
withLock: (project_id, file_id, fsPath, linkedFileData, userId, newFileRef, fileStoreUrl, callback)->
@ -274,11 +272,10 @@ module.exports = ProjectEntityUpdateHandler = self =
if not SafePath.isCleanFilename fileName
return callback new Errors.InvalidNameError("invalid element name")
# create a new file
fileRef = new File(
fileArgs =
name: fileName
linkedFileData: linkedFileData
)
FileStoreHandler.uploadFileFromDisk project_id, fileRef._id, fsPath, (err, fileStoreUrl)->
FileStoreHandler.uploadFileFromDisk project_id, fileArgs, fsPath, (err, fileStoreUrl, fileRef)->
return callback(err) if err?
next(project_id, folder_id, fileName, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback)
withLock: (project_id, folder_id, fileName, fsPath, linkedFileData, userId, newFileRef, fileStoreUrl, callback = (err, file, isNewFile, existingFile)->)->
@ -320,11 +317,10 @@ module.exports = ProjectEntityUpdateHandler = self =
fileName = path.basename(elementPath)
folderPath = path.dirname(elementPath)
# create a new file
fileRef = new File(
fileArgs =
name: fileName
linkedFileData: linkedFileData
)
FileStoreHandler.uploadFileFromDisk project_id, fileRef._id, fsPath, (err, fileStoreUrl)->
FileStoreHandler.uploadFileFromDisk project_id, fileArgs, fsPath, (err, fileStoreUrl, fileRef)->
return callback(err) if err?
next project_id, folderPath, fileName, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback
withLock: (project_id, folderPath, fileName, fsPath, linkedFileData, userId, fileRef, fileStoreUrl, callback) ->

View file

@ -9,6 +9,7 @@ FileSchema = new Schema
created : type:Date, default: () -> new Date()
rev : {type:Number, default:0}
linkedFileData: { type: Schema.Types.Mixed }
hash : type:String
mongoose.model 'File', FileSchema
exports.File = mongoose.model 'File'

View file

@ -470,6 +470,7 @@ describe 'DocumentUpdaterHandler', ->
pathname: "/foo"
docLines: 'a\nb'
url: undefined
hash: undefined
]
@handler.updateProjectStructure @project_id, @projectHistoryId, @user_id, @changes, () =>
@ -485,7 +486,7 @@ describe 'DocumentUpdaterHandler', ->
@fileId = new ObjectId()
@changes = {
newFiles: [
{ path: '/bar', url: 'filestore.example.com/file', file: _id: @fileId }
{ path: '/bar', url: 'filestore.example.com/file', file: {_id: @fileId, hash: "12345" }}
]
newProject: {version: @version}
}
@ -495,6 +496,7 @@ describe 'DocumentUpdaterHandler', ->
pathname: "/bar"
url: 'filestore.example.com/file'
docLines: undefined
hash: "12345"
]
@handler.updateProjectStructure @project_id, @projectHistoryId, @user_id, @changes, () =>

View file

@ -22,11 +22,23 @@ describe "FileStoreHandler", ->
@readStream = {my:"readStream", on: sinon.stub()}
@request = sinon.stub()
@settings = apis:{filestore:{url:"http//filestore.sharelatex.test"}}
@hashValue = "0123456789"
@FileModel = class File
constructor:(options)->
{@name,@hash} = options
@_id = "file_id_here"
@rev = 0
if options.linkedFileData?
@linkedFileData = options.linkedFileData
@handler = SandboxedModule.require modulePath, requires:
"settings-sharelatex":@settings
"request":@request
"logger-sharelatex" : @logger = {log:sinon.stub(), err:sinon.stub()}
"./FileHashManager" : @FileHashManager = { computeHash: sinon.stub().callsArgWith(1, null, @hashValue)}
# FIXME: need to stub File object here
"../../models/File" : File: @FileModel
"fs" : @fs
@file_args = {name: "upload-filename"}
@file_id = "file_id_here"
@project_id = "1312312312"
@fsPath = "uploads/myfile.eps"
@ -42,7 +54,7 @@ describe "FileStoreHandler", ->
on: (type, cb)->
if type == "open"
cb()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
@fs.createReadStream.calledWith(@fsPath).should.equal true
done()
@ -55,7 +67,7 @@ describe "FileStoreHandler", ->
pipe:(o)=>
@writeStream.should.equal o
done()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
it "should pass the correct options to request", (done)->
@fs.createReadStream.returns
@ -63,7 +75,7 @@ describe "FileStoreHandler", ->
on: (type, cb)->
if type == "open"
cb()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
@request.args[0][0].method.should.equal "post"
@request.args[0][0].uri.should.equal @handler._buildUrl()
done()
@ -74,19 +86,21 @@ describe "FileStoreHandler", ->
on: (type, cb)->
if type == "open"
cb()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
@handler._buildUrl.calledWith(@project_id, @file_id).should.equal true
done()
it 'should callback with the url', (done) ->
it 'should callback with the url and fileRef', (done) ->
@fs.createReadStream.returns
pipe:->
on: (type, cb)->
if type == "open"
cb()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, (err, url) =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, (err, url, fileRef) =>
expect(err).to.not.exist
expect(url).to.equal(@handler._buildUrl())
expect(fileRef._id).to.equal(@file_id)
expect(fileRef.hash).to.equal(@hashValue)
done()
describe "symlink", ->
@ -97,14 +111,14 @@ describe "FileStoreHandler", ->
})
it "should not read file if it is symlink", (done)->
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
@fs.createReadStream.called.should.equal false
done()
describe "symlink", ->
it "should not read file stat returns nothing", (done)->
@fs.lstat = sinon.stub().callsArgWith(1, null, null)
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, =>
@fs.createReadStream.called.should.equal false
done()
@ -121,7 +135,7 @@ describe "FileStoreHandler", ->
on: (type, cb)->
if type == "open"
cb()
@handler.uploadFileFromDisk @project_id, @file_id, @fsPath, (err) =>
@handler.uploadFileFromDisk @project_id, @file_args, @fsPath, (err) =>
expect(err).to.exist
expect(err).to.be.instanceof Error
expect(@fs.createReadStream.callCount).to.equal @handler.RETRY_ATTEMPTS

View file

@ -125,6 +125,7 @@ describe 'ProjectEntityMongoUpdateHandler', ->
_id: file_id
name: @file.name
linkedFileData: @file.linkedFileData
hash: @file.hash
deletedAt: new Date()
}
}
@ -138,7 +139,7 @@ describe 'ProjectEntityMongoUpdateHandler', ->
{ _id: project_id },
{
'$inc': { 'version': 1, 'file.png.rev': 1 }
'$set': { 'file.png._id': @newFile._id, 'file.png.created': new Date(), 'file.png.linkedFileData': @linkedFileData }
'$set': { 'file.png._id': @newFile._id, 'file.png.created': new Date(), 'file.png.linkedFileData': @linkedFileData, 'file.png.hash': @hash }
},
{new: true}
)

View file

@ -26,9 +26,7 @@ describe 'ProjectEntityUpdateHandler', ->
history:
id: projectHistoryId
@fileUrl = 'filestore.example.com/file'
@FileStoreHandler =
uploadFileFromDisk: sinon.stub().yields(null, @fileUrl)
copyFile: sinon.stub().yields(null, @fileUrl)
@FileStoreHandler = {}
@DocModel = class Doc
constructor:(options)->
@ -85,7 +83,7 @@ describe 'ProjectEntityUpdateHandler', ->
@oldFileRef = {name:@fileName, _id:"oldFileRef"}
@ProjectEntityMongoUpdateHandler._confirmFolder = sinon.stub().yields(folder_id)
@ProjectEntityMongoUpdateHandler._putElement = sinon.stub().yields(null, {path:{fileSystem: @fileSystemPath}})
@FileStoreHandler.copyFile = sinon.stub().yields(null, @fileUrl)
@ProjectEntityUpdateHandler.copyFileFromExistingProjectWithProject @project._id, @project, folder_id, @oldProject_id, @oldFileRef, userId, @callback
it 'should copy the file in FileStoreHandler', ->
@ -134,7 +132,7 @@ describe 'ProjectEntityUpdateHandler', ->
}
@ProjectEntityMongoUpdateHandler._confirmFolder = sinon.stub().yields(folder_id)
@ProjectEntityMongoUpdateHandler._putElement = sinon.stub().yields(null, {path:{fileSystem: @fileSystemPath}})
@FileStoreHandler.copyFile = sinon.stub().yields(null, @fileUrl)
@ProjectEntityUpdateHandler.copyFileFromExistingProjectWithProject @project._id, @project, folder_id, @oldProject_id, @oldFileRef, userId, @callback
it 'should copy the file in FileStoreHandler', ->
@ -337,13 +335,14 @@ describe 'ProjectEntityUpdateHandler', ->
@path = "/path/to/file"
@newFile = {_id: file_id, rev: 0, name: @fileName, linkedFileData: @linkedFileData}
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @fileUrl, @newFile)
@TpdsUpdateSender.addFile = sinon.stub().yields()
@ProjectEntityMongoUpdateHandler.addFile = sinon.stub().yields(null, {path: fileSystem: @path}, @project)
@ProjectEntityUpdateHandler.addFile project_id, folder_id, @fileName, @fileSystemPath, @linkedFileData, userId, @callback
it "updates the file in the filestore", () ->
@FileStoreHandler.uploadFileFromDisk
.calledWith(project_id, file_id, @fileSystemPath)
.calledWith(project_id, {name:@fileName, linkedFileData:@linkedFileData}, @fileSystemPath)
.should.equal true
it "updates the file in mongo", () ->
@ -393,19 +392,20 @@ describe 'ProjectEntityUpdateHandler', ->
beforeEach ->
# replacement file now creates a new file object
@newFileUrl = "new-file-url"
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @newFileUrl)
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @newFileUrl, @newFile)
@newFile = _id: new_file_id, name: "dummy-upload-filename", rev: 0, linkedFileData: @linkedFileData
@oldFile = _id: file_id, rev: 3
@path = "/path/to/file"
@newProject = "new project"
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @newFileUrl, @newFile)
@ProjectEntityMongoUpdateHandler._insertDeletedFileReference = sinon.stub().yields()
@ProjectEntityMongoUpdateHandler.replaceFileWithNew = sinon.stub().yields(null, @oldFile, @project, fileSystem: @path, @newProject)
@ProjectEntityUpdateHandler.replaceFile project_id, file_id, @fileSystemPath, @linkedFileData, userId, @callback
it 'uploads a new version of the file', ->
@FileStoreHandler.uploadFileFromDisk
.calledWith(project_id, new_file_id, @fileSystemPath)
.calledWith(project_id, {name:"dummy-upload-filename", linkedFileData:@linkedFileData}, @fileSystemPath)
.should.equal true
it 'replaces the file in mongo', ->
@ -432,7 +432,7 @@ describe 'ProjectEntityUpdateHandler', ->
newFiles = [
file: @newFile
path: @path
url: @newFileUrl
url: @newFileUrl
]
@DocumentUpdaterHandler.updateProjectStructure
.calledWith(project_id, projectHistoryId, userId, {oldFiles, newFiles, newProject: @newProject})
@ -514,6 +514,9 @@ describe 'ProjectEntityUpdateHandler', ->
.should.equal true
describe 'upsertFile', ->
beforeEach ->
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @fileUrl, @newFile)
describe 'upserting into an invalid folder', ->
beforeEach ->
@ProjectLocator.findElement = sinon.stub().yields()
@ -653,6 +656,7 @@ describe 'ProjectEntityUpdateHandler', ->
@folder = _id: folder_id
@file = _id: file_id
@isNewFile = true
@FileStoreHandler.uploadFileFromDisk = sinon.stub().yields(null, @fileUrl, @newFile)
@ProjectEntityUpdateHandler.mkdirp =
withoutLock: sinon.stub().yields(null, @newFolders, @folder)
@ProjectEntityUpdateHandler.upsertFile =