Archive ranges as well as doc lines

This commit is contained in:
James Allen 2017-03-30 17:13:43 +01:00
parent 315a1d5e80
commit a9ab7b739d
6 changed files with 436 additions and 153 deletions

View file

@ -6,12 +6,13 @@ async = require "async"
settings = require("settings-sharelatex")
request = require("request")
crypto = require("crypto")
RangeManager = require("./RangeManager")
thirtySeconds = 30 * 1000
module.exports = DocArchive =
archiveAllDocs: (project_id, callback = (err, docs) ->) ->
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, rev: true, inS3: true}, (err, docs) ->
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, ranges: true, rev: true, inS3: true}, (err, docs) ->
if err?
return callback(err)
else if !docs?
@ -26,21 +27,26 @@ module.exports = DocArchive =
archiveDoc: (project_id, doc, callback)->
logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3"
try
options = DocArchive.buildS3Options(doc.lines, project_id+"/"+doc._id)
options = DocArchive.buildS3Options(project_id+"/"+doc._id)
catch e
return callback e
request.put options, (err, res)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(JSON.stringify(doc.lines), "utf8").digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
DocArchive._mongoDocToS3Doc doc, (error, json_doc) ->
return callback(error) if error?
options.body = json_doc
options.headers =
'Content-Type': "application/json"
request.put options, (err, res) ->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(json_doc, "utf8").digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
unArchiveAllDocs: (project_id, callback = (err) ->) ->
MongoManager.getArchivedProjectDocs project_id, (err, docs) ->
@ -60,23 +66,50 @@ module.exports = DocArchive =
unarchiveDoc: (project_id, doc_id, callback)->
logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3"
try
options = DocArchive.buildS3Options(true, project_id+"/"+doc_id)
options = DocArchive.buildS3Options(project_id+"/"+doc_id)
catch e
return callback e
request.get options, (err, res, lines)->
options.json = true
request.get options, (err, res, doc)->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), {lines}, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
DocArchive._s3DocToMongoDoc doc, (error, mongo_doc) ->
return callback(error) if error?
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), mongo_doc, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
_s3DocToMongoDoc: (doc, callback = (error, mongo_doc) ->) ->
mongo_doc = {}
if doc.schema_v == 1 and doc.lines?
mongo_doc.lines = doc.lines
if doc.ranges?
mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
else if doc instanceof Array
mongo_doc.lines = doc
else
return callback(new Error("I don't understand the doc format in s3"))
return callback null, mongo_doc
buildS3Options: (content, key)->
_mongoDocToS3Doc: (doc, callback = (error, s3_doc) ->) ->
json = JSON.stringify({
lines: doc.lines
ranges: doc.ranges
schema_v: 1
})
if json.indexOf("\u0000") != -1
error = new Error("null bytes detected")
logger.error {err: error, project_id, doc_id}, error.message
return callback(error)
return callback null, json
buildS3Options: (key)->
if !settings.docstore.s3?
throw new Error("S3 settings are not configured")
return {
@ -85,6 +118,5 @@ module.exports = DocArchive =
secret: settings.docstore.s3.secret
bucket: settings.docstore.s3.bucket
timeout: thirtySeconds
json: content
uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}"
}

View file

@ -42,6 +42,7 @@ module.exports = MongoManager =
$unset: {}
update.$set["inS3"] = true
update.$unset["lines"] = true
update.$unset["ranges"] = true
query =
_id: doc_id
rev: rev

View file

@ -16,16 +16,21 @@ module.exports = RangeManager =
jsonRangesToMongo: (ranges) ->
return null if !ranges?
updateMetadata = (metadata) ->
if metadata?.ts?
metadata.ts = new Date(metadata.ts)
if metadata?.user_id?
metadata.user_id = RangeManager._safeObjectId(metadata.user_id)
for change in ranges.changes or []
change.id = @_safeObjectId(change.id)
if change.metadata?.ts?
change.metadata.ts = new Date(change.metadata.ts)
if change.metadata?.user_id?
change.metadata.user_id = @_safeObjectId(change.metadata.user_id)
change.id = RangeManager._safeObjectId(change.id)
updateMetadata(change.metadata)
for comment in ranges.comments or []
comment.id = @_safeObjectId(comment.id)
comment.id = RangeManager._safeObjectId(comment.id)
if comment.op?.t?
comment.op.t = @_safeObjectId(comment.op.t)
comment.op.t = RangeManager._safeObjectId(comment.op.t)
updateMetadata(comment.metadata)
return ranges
_safeObjectId: (data) ->

File diff suppressed because one or more lines are too long

View file

@ -48,5 +48,6 @@ module.exports = DocstoreClient =
getS3Doc: (project_id, doc_id, callback = (error, res, body) ->) ->
options = DocArchiveManager.buildS3Options(true, project_id+"/"+doc_id)
request.get options, callback
options = DocArchiveManager.buildS3Options(project_id+"/"+doc_id)
options.json = true
request.get options, callback

View file

@ -71,6 +71,7 @@ describe "DocArchiveManager", ->
"settings-sharelatex": @settings
"./MongoManager": @MongoManager
"request": @request
"./RangeManager": @RangeManager = {}
"logger-sharelatex":
log:->
err:->
@ -87,13 +88,22 @@ describe "DocArchiveManager", ->
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
opts = @request.put.args[0][0]
assert.deepEqual(opts.aws, {key:@settings.docstore.s3.key, secret:@settings.docstore.s3.secret, bucket:@settings.docstore.s3.bucket})
opts.json.should.equal @mongoDocs[0].lines
opts.body.should.equal JSON.stringify(
lines: @mongoDocs[0].lines
ranges: @mongoDocs[0].ranges
schema_v: 1
)
opts.timeout.should.equal (30*1000)
opts.uri.should.equal "https://#{@settings.docstore.s3.bucket}.s3.amazonaws.com/#{@project_id}/#{@mongoDocs[0]._id}"
done()
it "should return no md5 error", (done)->
@md5 = crypto.createHash("md5").update(JSON.stringify(@mongoDocs[0].lines)).digest("hex")
data = JSON.stringify(
lines: @mongoDocs[0].lines
ranges: @mongoDocs[0].ranges
schema_v: 1
)
@md5 = crypto.createHash("md5").update(data).digest("hex")
@request.put = sinon.stub().callsArgWith(1, null, {statusCode:200,headers:{etag:@md5}})
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
should.not.exist err