Merge pull request #22 from sharelatex/ja-archive-ranges

Archive ranges as well as doc lines
This commit is contained in:
James Allen 2017-05-17 12:02:42 +01:00 committed by GitHub
commit e10d09d040
6 changed files with 515 additions and 165 deletions

View file

@ -6,12 +6,13 @@ async = require "async"
settings = require("settings-sharelatex")
request = require("request")
crypto = require("crypto")
RangeManager = require("./RangeManager")
thirtySeconds = 30 * 1000
module.exports = DocArchive =
archiveAllDocs: (project_id, callback = (err, docs) ->) ->
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, rev: true, inS3: true}, (err, docs) ->
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, ranges: true, rev: true, inS3: true}, (err, docs) ->
if err?
return callback(err)
else if !docs?
@ -26,21 +27,26 @@ module.exports = DocArchive =
archiveDoc: (project_id, doc, callback)->
logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3"
try
options = DocArchive.buildS3Options(doc.lines, project_id+"/"+doc._id)
options = DocArchive.buildS3Options(project_id+"/"+doc._id)
catch e
return callback e
request.put options, (err, res)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(JSON.stringify(doc.lines), "utf8").digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
DocArchive._mongoDocToS3Doc doc, (error, json_doc) ->
return callback(error) if error?
options.body = json_doc
options.headers =
'Content-Type': "application/json"
request.put options, (err, res) ->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(json_doc, "utf8").digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
unArchiveAllDocs: (project_id, callback = (err) ->) ->
MongoManager.getArchivedProjectDocs project_id, (err, docs) ->
@ -60,26 +66,52 @@ module.exports = DocArchive =
unarchiveDoc: (project_id, doc_id, callback)->
logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3"
try
options = DocArchive.buildS3Options(true, project_id+"/"+doc_id)
options = DocArchive.buildS3Options(project_id+"/"+doc_id)
catch e
return callback e
request.get options, (err, res, lines)->
options.json = true
request.get options, (err, res, doc)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
if !(lines instanceof Array)
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, lines:lines, "doc lines from aws are not in array format, likely not JSON parsable"
return callback(new Error("Error unpacking doc"))
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), {lines}, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
DocArchive._s3DocToMongoDoc doc, (error, mongo_doc) ->
return callback(error) if error?
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), mongo_doc, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
buildS3Options: (content, key)->
_s3DocToMongoDoc: (doc, callback = (error, mongo_doc) ->) ->
mongo_doc = {}
if doc.schema_v == 1 and doc.lines?
mongo_doc.lines = doc.lines
if doc.ranges?
mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
else if doc instanceof Array
mongo_doc.lines = doc
else
return callback(new Error("I don't understand the doc format in s3"))
return callback null, mongo_doc
_mongoDocToS3Doc: (doc, callback = (error, s3_doc) ->) ->
if !doc.lines?
return callback(new Error("doc has no lines"))
json = JSON.stringify({
lines: doc.lines
ranges: doc.ranges
schema_v: 1
})
if json.indexOf("\u0000") != -1
error = new Error("null bytes detected")
logger.err {err: error, doc, json}, error.message
return callback(error)
return callback null, json
buildS3Options: (key)->
if !settings.docstore.s3?
throw new Error("S3 settings are not configured")
return {
@ -88,6 +120,5 @@ module.exports = DocArchive =
secret: settings.docstore.s3.secret
bucket: settings.docstore.s3.bucket
timeout: thirtySeconds
json: content
uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}"
}

View file

@ -44,6 +44,7 @@ module.exports = MongoManager =
$unset: {}
update.$set["inS3"] = true
update.$unset["lines"] = true
update.$unset["ranges"] = true
query =
_id: doc_id
rev: rev

View file

@ -16,16 +16,21 @@ module.exports = RangeManager =
jsonRangesToMongo: (ranges) ->
return null if !ranges?
updateMetadata = (metadata) ->
if metadata?.ts?
metadata.ts = new Date(metadata.ts)
if metadata?.user_id?
metadata.user_id = RangeManager._safeObjectId(metadata.user_id)
for change in ranges.changes or []
change.id = @_safeObjectId(change.id)
if change.metadata?.ts?
change.metadata.ts = new Date(change.metadata.ts)
if change.metadata?.user_id?
change.metadata.user_id = @_safeObjectId(change.metadata.user_id)
change.id = RangeManager._safeObjectId(change.id)
updateMetadata(change.metadata)
for comment in ranges.comments or []
comment.id = @_safeObjectId(comment.id)
comment.id = RangeManager._safeObjectId(comment.id)
if comment.op?.t?
comment.op.t = @_safeObjectId(comment.op.t)
comment.op.t = RangeManager._safeObjectId(comment.op.t)
updateMetadata(comment.metadata)
return ranges
_safeObjectId: (data) ->

File diff suppressed because one or more lines are too long

View file

@ -48,5 +48,6 @@ module.exports = DocstoreClient =
getS3Doc: (project_id, doc_id, callback = (error, res, body) ->) ->
options = DocArchiveManager.buildS3Options(true, project_id+"/"+doc_id)
request.get options, callback
options = DocArchiveManager.buildS3Options(project_id+"/"+doc_id)
options.json = true
request.get options, callback

View file

@ -71,14 +71,17 @@ describe "DocArchiveManager", ->
"settings-sharelatex": @settings
"./MongoManager": @MongoManager
"request": @request
"./RangeManager": @RangeManager = {}
"logger-sharelatex":
log:->
err:->
@globals =
JSON: JSON
@error = "my errror"
@project_id = ObjectId().toString()
@stubbedError = new Errors.NotFoundError("Error in S3 request")
@DocArchiveManager = SandboxedModule.require modulePath, requires: @requires
@DocArchiveManager = SandboxedModule.require modulePath, requires: @requires, globals: @globals
describe "archiveDoc", ->
@ -87,13 +90,22 @@ describe "DocArchiveManager", ->
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
opts = @request.put.args[0][0]
assert.deepEqual(opts.aws, {key:@settings.docstore.s3.key, secret:@settings.docstore.s3.secret, bucket:@settings.docstore.s3.bucket})
opts.json.should.equal @mongoDocs[0].lines
opts.body.should.equal JSON.stringify(
lines: @mongoDocs[0].lines
ranges: @mongoDocs[0].ranges
schema_v: 1
)
opts.timeout.should.equal (30*1000)
opts.uri.should.equal "https://#{@settings.docstore.s3.bucket}.s3.amazonaws.com/#{@project_id}/#{@mongoDocs[0]._id}"
done()
it "should return no md5 error", (done)->
@md5 = crypto.createHash("md5").update(JSON.stringify(@mongoDocs[0].lines)).digest("hex")
data = JSON.stringify(
lines: @mongoDocs[0].lines
ranges: @mongoDocs[0].ranges
schema_v: 1
)
@md5 = crypto.createHash("md5").update(data).digest("hex")
@request.put = sinon.stub().callsArgWith(1, null, {statusCode:200,headers:{etag:@md5}})
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
should.not.exist err
@ -202,3 +214,82 @@ describe "DocArchiveManager", ->
@DocArchiveManager.unArchiveAllDocs @project_id, (err)=>
err.should.equal @error
done()
describe "_s3DocToMongoDoc", ->
describe "with the old schema", ->
it "should return the docs lines", (done) ->
@DocArchiveManager._s3DocToMongoDoc ["doc", "lines"], (error, doc) ->
expect(doc).to.deep.equal {
lines: ["doc", "lines"]
}
done()
describe "with the new schema", ->
it "should return the doc lines and ranges", (done) ->
@RangeManager.jsonRangesToMongo = sinon.stub().returns {"mongo": "ranges"}
@DocArchiveManager._s3DocToMongoDoc {
lines: ["doc", "lines"]
ranges: {"json": "ranges"}
schema_v: 1
}, (error, doc) ->
expect(doc).to.deep.equal {
lines: ["doc", "lines"]
ranges: {"mongo": "ranges"}
}
done()
it "should return just the doc lines when there are no ranges", (done) ->
@DocArchiveManager._s3DocToMongoDoc {
lines: ["doc", "lines"]
schema_v: 1
}, (error, doc) ->
expect(doc).to.deep.equal {
lines: ["doc", "lines"]
}
done()
describe "with an unrecognised schema", ->
it "should return an error", (done) ->
@DocArchiveManager._s3DocToMongoDoc {
schema_v: 2
}, (error, doc) ->
expect(error).to.exist
done()
describe "_mongoDocToS3Doc", ->
describe "with a valid doc", ->
it "should return the json version", (done) ->
@DocArchiveManager._mongoDocToS3Doc doc = {
lines: ["doc", "lines"]
ranges: { "mock": "ranges" }
}, (err, s3_doc) ->
expect(s3_doc).to.equal JSON.stringify({
lines: ["doc", "lines"]
ranges: { "mock": "ranges" }
schema_v: 1
})
done()
describe "with null bytes in the result", ->
beforeEach ->
@_stringify = JSON.stringify
JSON.stringify = sinon.stub().returns '{"bad": "\u0000"}'
afterEach ->
JSON.stringify = @_stringify
it "should return an error", (done) ->
@DocArchiveManager._mongoDocToS3Doc {
lines: ["doc", "lines"]
ranges: { "mock": "ranges" }
}, (err, s3_doc) ->
expect(err).to.exist
done()
describe "without doc lines", ->
it "should return an error", (done) ->
@DocArchiveManager._mongoDocToS3Doc {}, (err, s3_doc) ->
expect(err).to.exist
done()