Merge pull request #9 from sharelatex/s3-archive

S3 archive
This commit is contained in:
Henry Oswald 2015-08-13 14:20:46 +01:00
commit 6ea284b17b
11 changed files with 508 additions and 16 deletions

View file

@ -22,6 +22,8 @@ app.get '/project/:project_id/doc/:doc_id/raw', HttpController.getRawDoc
app.post '/project/:project_id/doc/:doc_id', bodyParser.json(limit: "2mb"), HttpController.updateDoc
app.del '/project/:project_id/doc/:doc_id', HttpController.deleteDoc
app.get '/project/:project_id/archive', HttpController.archiveAllDocs
app.get '/status', (req, res)->
res.send('docstore is alive')

View file

@ -0,0 +1,84 @@
MongoManager = require "./MongoManager"
Errors = require "./Errors"
logger = require "logger-sharelatex"
_ = require "underscore"
async = require "async"
settings = require("settings-sharelatex")
request = require("request")
crypto = require("crypto")
thirtySeconds = 30 * 1000
module.exports = DocArchive =
archiveAllDocs: (project_id, callback = (err, docs) ->) ->
MongoManager.getProjectsDocs project_id, (err, docs) ->
if err?
return callback(err)
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
jobs = _.map docs, (doc) ->
(cb)->
if doc.inS3
return cb()
else
DocArchive.archiveDoc project_id, doc, cb
async.series jobs, callback
archiveDoc: (project_id, doc, callback)->
logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3"
options = DocArchive.buildS3Options(doc.lines, project_id+"/"+doc._id)
request.put options, (err, res)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(JSON.stringify(doc.lines)).digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
unArchiveAllDocs: (project_id, callback = (err) ->) ->
MongoManager.getArchivedProjectDocs project_id, (err, docs) ->
if err?
logger.err err:err, project_id:project_id, "error unarchiving all docs"
return callback(err)
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
jobs = _.map docs, (doc) ->
(cb)->
if !doc.inS3?
return cb()
else
DocArchive.unarchiveDoc project_id, doc._id, cb
async.series jobs, callback
unarchiveDoc: (project_id, doc_id, callback)->
logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3"
options = DocArchive.buildS3Options(true, project_id+"/"+doc_id)
request.get options, (err, res, lines)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), lines, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
buildS3Options: (content, key)->
return {
aws:
key: settings.docstore.s3.key
secret: settings.docstore.s3.secret
bucket: settings.docstore.s3.bucket
timeout: thirtySeconds
json: content
uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}"
}

View file

@ -3,6 +3,11 @@ Errors = require "./Errors"
logger = require "logger-sharelatex"
_ = require "underscore"
async = require "async"
settings = require("settings-sharelatex")
request = require("request")
crypto = require("crypto")
thirtySeconds = 30 * 1000
DocArchive = require "./DocArchiveManager"
module.exports = DocManager =
@ -12,20 +17,28 @@ module.exports = DocManager =
return callback(err)
else if !doc?
return callback new Errors.NotFoundError("No such doc: #{doc_id} in project #{project_id}")
callback null, doc
else if doc?.inS3
DocArchive.unarchiveDoc project_id, doc_id, (err)->
if err?
logger.err err:err, project_id:project_id, doc_id:doc_id, "error unarchiving doc"
return callback(err)
MongoManager.findDoc doc_id, callback
else
callback err, doc
getAllDocs: (project_id, callback = (error, docs) ->) ->
MongoManager.getProjectsDocs project_id, (error, docs) ->
if err?
return callback(error)
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
else
return callback(null, docs)
DocArchive.unArchiveAllDocs project_id, (error) ->
MongoManager.getProjectsDocs project_id, (error, docs) ->
if err?
return callback(error)
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
else
return callback(null, docs)
updateDoc: (project_id, doc_id, lines, callback = (error, modified, rev) ->) ->
MongoManager.findDoc doc_id, (err, doc)->
if err?
DocManager.getDoc project_id, doc_id, (err, doc)->
if err? and !(err instanceof Errors.NotFoundError)
logger.err project_id: project_id, doc_id: doc_id, err:err, "error getting document for update"
return callback(err)
@ -57,4 +70,3 @@ module.exports = DocManager =
MongoManager.markDocAsDeleted doc_id, (error) ->
return callback(error) if error?
callback()

View file

@ -1,5 +1,6 @@
DocManager = require "./DocManager"
logger = require "logger-sharelatex"
DocArchive = require "./DocArchiveManager"
module.exports = HttpController =
getDoc: (req, res, next = (error) ->) ->
@ -78,3 +79,10 @@ module.exports = HttpController =
_buildRawDocView: (doc)->
return (doc?.lines or []).join("\n")
archiveAllDocs: (req, res, next = (error) ->) ->
project_id = req.params.project_id
logger.log project_id: project_id, "archiving all docs"
DocArchive.archiveAllDocs project_id, (error) ->
return next(error) if error?
res.send 204

View file

@ -9,13 +9,21 @@ module.exports = MongoManager =
getProjectsDocs: (project_id, callback)->
db.docs.find project_id: ObjectId(project_id.toString()), {}, callback
getArchivedProjectDocs: (project_id, callback)->
query =
project_id: ObjectId(project_id.toString())
inS3: true
db.docs.find query, {}, callback
upsertIntoDocCollection: (project_id, doc_id, lines, callback)->
update =
$set:{}
$inc:{}
$unset:{}
update.$set["lines"] = lines
update.$set["project_id"] = ObjectId(project_id)
update.$inc["rev"] = 1 #on new docs being created this will set the rev to 1
update.$unset["inS3"] = true
db.docs.update _id: ObjectId(doc_id), update, {upsert: true}, callback
@ -25,3 +33,15 @@ module.exports = MongoManager =
update.$set["deleted"] = true
db.docs.update _id: ObjectId(doc_id), update, (err)->
callback(err)
markDocAsArchived: (doc_id, rev, callback)->
update =
$set: {}
$unset: {}
update.$set["inS3"] = true
update.$unset["lines"] = true
query =
_id: doc_id
rev: rev
db.docs.update query, update, (err)->
callback(err)

View file

@ -9,3 +9,9 @@ module.exports =
mongo:
url: 'mongodb://127.0.0.1/sharelatex'
#docstore:
# s3:
# key: ""
# secret: ""
# bucket: "something"

View file

@ -0,0 +1,152 @@
sinon = require "sinon"
chai = require("chai")
should = chai.should()
{db, ObjectId} = require "../../../app/js/mongojs"
async = require "async"
Settings = require("settings-sharelatex")
DocstoreClient = require "./helpers/DocstoreClient"
if Settings.filestore?.backend == "s3"
describe "Archiving all docs", ->
beforeEach (done) ->
@callback = sinon.stub()
@project_id = ObjectId()
@docs = [{
_id: ObjectId()
lines: ["one", "two", "three"]
rev: 2
}, {
_id: ObjectId()
lines: ["aaa", "bbb", "ccc"]
rev: 4
}, {
_id: ObjectId()
lines: ["111", "222", "333"]
rev: 6
}]
jobs = for doc in @docs
do (doc) =>
(callback) =>
DocstoreClient.createDoc @project_id, doc._id, doc.lines, (err)=>
doc.lines[0] = doc.lines[0]+" added"
DocstoreClient.updateDoc @project_id, doc._id, doc.lines, callback
async.series jobs, done
afterEach (done) ->
db.docs.remove({project_id: @project_id}, done)
describe "Archiving all docs", ->
beforeEach (done) ->
DocstoreClient.archiveAllDoc @project_id, (error, @res) =>
done()
it "should archive all the docs", (done) ->
@res.statusCode.should.equal 204
done()
it "should set inS3 and unset lines in each doc", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
db.docs.findOne _id: archiveDoc._id, (error, doc) =>
should.not.exist doc.lines
doc.inS3.should.equal true
callback()
async.series jobs, done
it "should be able get the same docs back", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
DocstoreClient.getS3Doc @project_id, archiveDoc._id, (error, res, doc) =>
doc.toString().should.equal archiveDoc.lines.toString()
callback()
async.series jobs, done
describe "Arching all docs twice", ->
beforeEach (done) ->
DocstoreClient.archiveAllDoc @project_id, (error, @res) =>
@res.statusCode.should.equal 204
DocstoreClient.archiveAllDoc @project_id, (error, @res) =>
@res.statusCode.should.equal 204
done()
it "should archive all the docs", (done) ->
@res.statusCode.should.equal 204
done()
it "should set inS3 and unset lines in each doc", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
db.docs.findOne _id: archiveDoc._id, (error, doc) =>
should.not.exist doc.lines
doc.inS3.should.equal true
callback()
async.series jobs, done
it "should be able get the same docs back", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
DocstoreClient.getS3Doc @project_id, archiveDoc._id, (error, res, doc) =>
doc.toString().should.equal archiveDoc.lines.toString()
callback()
async.series jobs, done
describe "archiving massive document", (done)->
beforeEach (done)->
@timeout 1000 * 30
quarterMegInBytes = 250000
lines = require("crypto").randomBytes(quarterMegInBytes).toString("hex")
console.log @project_id, @docs[1]._id, "helllllo"
@docs[1].lines = [lines,lines,lines,lines]
DocstoreClient.updateDoc @project_id, @docs[1]._id, @docs[1].lines, =>
DocstoreClient.archiveAllDoc @project_id, (error, @res) =>
done()
it "should archive all the docs", (done) ->
@res.statusCode.should.equal 204
done()
it "should set inS3 and unset lines in each doc", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
db.docs.findOne _id: archiveDoc._id, (error, doc) =>
should.not.exist doc.lines
doc.inS3.should.equal true
callback()
async.series jobs, done
it "should be able get the same docs back", (done) ->
jobs = for archiveDoc in @docs
do (archiveDoc) =>
(callback) =>
DocstoreClient.getS3Doc @project_id, archiveDoc._id, (error, res, doc) =>
doc.toString().should.equal archiveDoc.lines.toString()
callback()
async.series jobs, done
describe "Unarchiving all docs", ->
it "should unarchive all the docs", (done) ->
DocstoreClient.archiveAllDoc @project_id, (error, res) =>
DocstoreClient.getAllDocs @project_id, (error, res, docs) =>
throw error if error?
docs.length.should.equal @docs.length
for doc, i in docs
doc.lines.should.deep.equal @docs[i].lines
done()

View file

@ -1,5 +1,7 @@
request = require("request").defaults(jar: false)
{db, ObjectId} = require("../../../../app/js/mongojs")
settings = require("settings-sharelatex")
DocArchiveManager = require("../../../../app/js/DocArchiveManager.js")
module.exports = DocstoreClient =
@ -16,28 +18,34 @@ module.exports = DocstoreClient =
getDoc: (project_id, doc_id, qs, callback = (error, res, body) ->) ->
request.get {
url: "http://localhost:3016/project/#{project_id}/doc/#{doc_id}"
url: "http://localhost:#{settings.internal.docstore.port}/project/#{project_id}/doc/#{doc_id}"
json: true
qs:qs
}, callback
getAllDocs: (project_id, callback = (error, res, body) ->) ->
request.get {
url: "http://localhost:3016/project/#{project_id}/doc"
url: "http://localhost:#{settings.internal.docstore.port}/project/#{project_id}/doc"
json: true
}, callback
updateDoc: (project_id, doc_id, lines, callback = (error, res, body) ->) ->
request.post {
url: "http://localhost:3016/project/#{project_id}/doc/#{doc_id}"
url: "http://localhost:#{settings.internal.docstore.port}/project/#{project_id}/doc/#{doc_id}"
json:
lines: lines
}, callback
deleteDoc: (project_id, doc_id, callback = (error, res, body) ->) ->
request.del {
url: "http://localhost:3016/project/#{project_id}/doc/#{doc_id}"
url: "http://localhost:#{settings.internal.docstore.port}/project/#{project_id}/doc/#{doc_id}"
}, callback
archiveAllDoc: (project_id, callback = (error, res, body) ->) ->
request.get {
url: "http://localhost:#{settings.internal.docstore.port}/project/#{project_id}/archive"
}, callback
getS3Doc: (project_id, doc_id, callback = (error, res, body) ->) ->
options = DocArchiveManager.buildS3Options(true, project_id+"/"+doc_id)
request.get options, callback

View file

@ -0,0 +1,179 @@
assert = require("chai").assert
sinon = require('sinon')
chai = require('chai')
should = chai.should()
expect = chai.expect
modulePath = "../../../app/js/DocArchiveManager.js"
SandboxedModule = require('sandboxed-module')
ObjectId = require("mongojs").ObjectId
Errors = require "../../../app/js/Errors"
crypto = require("crypto")
describe "DocArchiveManager", ->
beforeEach ->
@settings =
docstore:
s3:
secret: "secret"
key: "this_key"
bucket:"doc-archive-unit-test"
@request =
put: {}
get: {}
del: {}
@archivedDocs = [{
_id: ObjectId()
inS3:true
rev: 2
}, {
_id: ObjectId()
inS3:true
rev: 4
}, {
_id: ObjectId()
inS3:true
rev: 6
}]
@mongoDocs = [{
_id: ObjectId()
lines: ["one", "two", "three"]
rev: 2
}, {
_id: ObjectId()
lines: ["aaa", "bbb", "ccc"]
rev: 4
}, {
_id: ObjectId()
inS3: true
rev: 6
}, {
_id: ObjectId()
inS3: true
rev: 6
}, {
_id: ObjectId()
lines: ["111", "222", "333"]
rev: 6
}]
@MongoManager =
markDocAsArchived: sinon.stub().callsArgWith(2, null)
upsertIntoDocCollection: sinon.stub().callsArgWith(3, null)
getProjectsDocs: sinon.stub().callsArgWith(1, null, @mongoDocs)
getArchivedProjectDocs: sinon.stub().callsArgWith(1, null, @mongoDocs)
@requires =
"settings-sharelatex": @settings
"./MongoManager": @MongoManager
"request": @request
"logger-sharelatex":
log:->
err:->
@error = "my errror"
@project_id = ObjectId().toString()
@stubbedError = new Errors.NotFoundError("Error in S3 request")
@DocArchiveManager = SandboxedModule.require modulePath, requires: @requires
describe "archiveDoc", ->
it "should use correct options", (done)->
@request.put = sinon.stub().callsArgWith(1, null, {statusCode:200,headers:{etag:""}})
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
opts = @request.put.args[0][0]
assert.deepEqual(opts.aws, {key:@settings.docstore.s3.key, secret:@settings.docstore.s3.secret, bucket:@settings.docstore.s3.bucket})
opts.json.should.equal @mongoDocs[0].lines
opts.timeout.should.equal (30*1000)
opts.uri.should.equal "https://#{@settings.docstore.s3.bucket}.s3.amazonaws.com/#{@project_id}/#{@mongoDocs[0]._id}"
done()
it "should return no md5 error", (done)->
@md5 = crypto.createHash("md5").update(JSON.stringify(@mongoDocs[0].lines)).digest("hex")
@request.put = sinon.stub().callsArgWith(1, null, {statusCode:200,headers:{etag:@md5}})
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
should.not.exist err
done()
it "should return the error", (done)->
@request.put = sinon.stub().callsArgWith(1, @stubbedError, {statusCode:400,headers:{etag:""}})
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
should.exist err
done()
describe "unarchiveDoc", ->
it "should use correct options", (done)->
@request.get = sinon.stub().callsArgWith(1, null, statusCode:200, @mongoDocs[0].lines)
@request.del = sinon.stub().callsArgWith(1, null, statusCode:204, {})
@DocArchiveManager.unarchiveDoc @project_id, @mongoDocs[0]._id, (err)=>
opts = @request.get.args[0][0]
assert.deepEqual(opts.aws, {key:@settings.docstore.s3.key, secret:@settings.docstore.s3.secret, bucket:@settings.docstore.s3.bucket})
opts.json.should.equal true
opts.timeout.should.equal (30*1000)
opts.uri.should.equal "https://#{@settings.docstore.s3.bucket}.s3.amazonaws.com/#{@project_id}/#{@mongoDocs[0]._id}"
done()
it "should return the error", (done)->
@request.get = sinon.stub().callsArgWith(1, @stubbedError, {}, {})
@DocArchiveManager.unarchiveDoc @project_id, @mongoDocs[0], (err)=>
should.exist err
done()
describe "archiveAllDocs", ->
it "should archive all project docs which are not in s3", (done)->
@MongoManager.getProjectsDocs = sinon.stub().callsArgWith(1, null, @mongoDocs)
@DocArchiveManager.archiveDoc = sinon.stub().callsArgWith(2, null)
@DocArchiveManager.archiveAllDocs @project_id, (err)=>
@DocArchiveManager.archiveDoc.calledWith(@project_id, @mongoDocs[0]).should.equal true
@DocArchiveManager.archiveDoc.calledWith(@project_id, @mongoDocs[1]).should.equal true
@DocArchiveManager.archiveDoc.calledWith(@project_id, @mongoDocs[4]).should.equal true
@DocArchiveManager.archiveDoc.calledWith(@project_id, @mongoDocs[2]).should.equal false
@DocArchiveManager.archiveDoc.calledWith(@project_id, @mongoDocs[3]).should.equal false
should.not.exist err
done()
it "should return error if have no docs", (done)->
@MongoManager.getProjectsDocs = sinon.stub().callsArgWith(1, null, null)
@DocArchiveManager.archiveAllDocs @project_id, (err)=>
should.exist err
done()
it "should return the error", (done)->
@MongoManager.getProjectsDocs = sinon.stub().callsArgWith(1, @error, null)
@DocArchiveManager.archiveAllDocs @project_id, (err)=>
err.should.equal @error
done()
describe "unArchiveAllDocs", ->
it "should unarchive all inS3 docs", (done)->
@MongoManager.getArchivedProjectDocs = sinon.stub().callsArgWith(1, null, @archivedDocs)
@DocArchiveManager.unarchiveDoc = sinon.stub().callsArgWith(2, null)
@DocArchiveManager.unArchiveAllDocs @project_id, (err)=>
for doc in @archivedDocs
@DocArchiveManager.unarchiveDoc.calledWith(@project_id, doc._id).should.equal true
should.not.exist err
done()
it "should return error if have no docs", (done)->
@MongoManager.getArchivedProjectDocs = sinon.stub().callsArgWith(1, null, null)
@DocArchiveManager.unArchiveAllDocs @project_id, (err)=>
should.exist err
done()
it "should return the error", (done)->
@MongoManager.getArchivedProjectDocs = sinon.stub().callsArgWith(1, @error, null)
@DocArchiveManager.unArchiveAllDocs @project_id, (err)=>
err.should.equal @error
done()

View file

@ -11,6 +11,7 @@ describe "DocManager", ->
beforeEach ->
@DocManager = SandboxedModule.require modulePath, requires:
"./MongoManager": @MongoManager = {}
"./DocArchiveManager": @DocArchiveManager = {}
"logger-sharelatex": @logger =
log: sinon.stub()
warn:->
@ -74,6 +75,7 @@ describe "DocManager", ->
beforeEach ->
@docs = [{ _id: @doc_id, lines: ["mock-lines"] }]
@MongoManager.getProjectsDocs = sinon.stub().callsArgWith(1, null, @docs)
@DocArchiveManager.unArchiveAllDocs = sinon.stub().callsArgWith(1, null, @docs)
@DocManager.getAllDocs @project_id, @callback
it "should get the project from the database", ->
@ -87,6 +89,7 @@ describe "DocManager", ->
describe "when there are no docs for the project", ->
beforeEach ->
@MongoManager.getProjectsDocs = sinon.stub().callsArgWith(1, null, null)
@DocArchiveManager.unArchiveAllDocs = sinon.stub().callsArgWith(1, null, null)
@DocManager.getAllDocs @project_id, @callback
it "should return a NotFoundError", ->

View file

@ -11,6 +11,7 @@ describe "HttpController", ->
beforeEach ->
@HttpController = SandboxedModule.require modulePath, requires:
"./DocManager": @DocManager = {}
"./DocArchiveManager": @DocArchiveManager = {}
"logger-sharelatex": @logger = { log: sinon.stub(), error: sinon.stub() }
@res = { send: sinon.stub(), json: sinon.stub(), setHeader:sinon.stub() }
@req = { query:{}}
@ -247,3 +248,20 @@ describe "HttpController", ->
@res.send
.calledWith(204)
.should.equal true
describe "archiveAllDocs", ->
beforeEach ->
@req.params =
project_id: @project_id
@DocArchiveManager.archiveAllDocs = sinon.stub().callsArg(1)
@HttpController.archiveAllDocs @req, @res, @next
it "should archive the project", ->
@DocArchiveManager.archiveAllDocs
.calledWith(@project_id)
.should.equal true
it "should return a 204 (No Content)", ->
@res.send
.calledWith(204)
.should.equal true